diff --git a/build.xml b/build.xml index 344c0d3b9da..3c91d19760a 100644 --- a/build.xml +++ b/build.xml @@ -19,7 +19,7 @@ - + @@ -35,7 +35,7 @@ - + diff --git a/dev-tools/eclipse/dot.classpath b/dev-tools/eclipse/dot.classpath index 987ea9ffcca..a5d64f92dcc 100644 --- a/dev-tools/eclipse/dot.classpath +++ b/dev-tools/eclipse/dot.classpath @@ -57,6 +57,7 @@ + diff --git a/dev-tools/idea/lucene/contrib/ant/ant.iml b/dev-tools/idea/lucene/contrib/ant/ant.iml index 7cabce8a0ad..8987c57a65f 100644 --- a/dev-tools/idea/lucene/contrib/ant/ant.iml +++ b/dev-tools/idea/lucene/contrib/ant/ant.iml @@ -9,10 +9,6 @@ - - - - @@ -27,5 +23,9 @@ + + + + diff --git a/dev-tools/idea/lucene/contrib/db/bdb-je/bdb-je.iml b/dev-tools/idea/lucene/contrib/db/bdb-je/bdb-je.iml index 7831dd14c14..af2667fdc24 100644 --- a/dev-tools/idea/lucene/contrib/db/bdb-je/bdb-je.iml +++ b/dev-tools/idea/lucene/contrib/db/bdb-je/bdb-je.iml @@ -8,8 +8,6 @@ - - @@ -23,5 +21,7 @@ + + diff --git a/dev-tools/idea/lucene/contrib/db/bdb/bdb.iml b/dev-tools/idea/lucene/contrib/db/bdb/bdb.iml index 7831dd14c14..af2667fdc24 100644 --- a/dev-tools/idea/lucene/contrib/db/bdb/bdb.iml +++ b/dev-tools/idea/lucene/contrib/db/bdb/bdb.iml @@ -8,8 +8,6 @@ - - @@ -23,5 +21,7 @@ + + diff --git a/dev-tools/idea/lucene/contrib/demo/demo.iml b/dev-tools/idea/lucene/contrib/demo/demo.iml index 85978fc7fb8..76ea82a5f5a 100644 --- a/dev-tools/idea/lucene/contrib/demo/demo.iml +++ b/dev-tools/idea/lucene/contrib/demo/demo.iml @@ -8,20 +8,10 @@ - - - - - - - - - - - - + + diff --git a/dev-tools/idea/lucene/contrib/highlighter/highlighter.iml b/dev-tools/idea/lucene/contrib/highlighter/highlighter.iml index b14b49f4cca..844c68e345d 100644 --- a/dev-tools/idea/lucene/contrib/highlighter/highlighter.iml +++ b/dev-tools/idea/lucene/contrib/highlighter/highlighter.iml @@ -8,12 +8,12 @@ - - - - + + + + diff --git a/dev-tools/idea/lucene/contrib/instantiated/instantiated.iml b/dev-tools/idea/lucene/contrib/instantiated/instantiated.iml index 25e1b2e0865..972d2176f6a 100644 --- a/dev-tools/idea/lucene/contrib/instantiated/instantiated.iml +++ b/dev-tools/idea/lucene/contrib/instantiated/instantiated.iml @@ -8,11 +8,11 @@ - - - + + + diff --git a/dev-tools/idea/lucene/contrib/lucli/lucli.iml b/dev-tools/idea/lucene/contrib/lucli/lucli.iml index 23d11788ba8..6d61e7ec871 100644 --- a/dev-tools/idea/lucene/contrib/lucli/lucli.iml +++ b/dev-tools/idea/lucene/contrib/lucli/lucli.iml @@ -8,10 +8,6 @@ - - - - @@ -24,5 +20,9 @@ + + + + diff --git a/dev-tools/idea/lucene/contrib/memory/memory.iml b/dev-tools/idea/lucene/contrib/memory/memory.iml index f92c9c012a3..ee5f3e1c905 100644 --- a/dev-tools/idea/lucene/contrib/memory/memory.iml +++ b/dev-tools/idea/lucene/contrib/memory/memory.iml @@ -8,11 +8,11 @@ - - - + + + diff --git a/dev-tools/idea/lucene/contrib/misc/misc.iml b/dev-tools/idea/lucene/contrib/misc/misc.iml index e4c809e98cd..4333fc5f428 100644 --- a/dev-tools/idea/lucene/contrib/misc/misc.iml +++ b/dev-tools/idea/lucene/contrib/misc/misc.iml @@ -8,10 +8,10 @@ - - + + diff --git a/dev-tools/idea/lucene/contrib/queries/queries.iml b/dev-tools/idea/lucene/contrib/queries/queries.iml index 0f649e6bc80..628ddee51b1 100644 --- a/dev-tools/idea/lucene/contrib/queries/queries.iml +++ b/dev-tools/idea/lucene/contrib/queries/queries.iml @@ -8,8 +8,6 @@ - - @@ -22,5 +20,7 @@ + + diff --git a/dev-tools/idea/lucene/contrib/queryparser/queryparser.iml b/dev-tools/idea/lucene/contrib/queryparser/queryparser.iml index 33297bbd24e..49139191784 100644 --- a/dev-tools/idea/lucene/contrib/queryparser/queryparser.iml +++ b/dev-tools/idea/lucene/contrib/queryparser/queryparser.iml @@ -9,11 +9,11 @@ - - - + + + diff --git a/dev-tools/idea/lucene/contrib/spatial/spatial.iml b/dev-tools/idea/lucene/contrib/spatial/spatial.iml index 84f9bf02d5d..29d18e1a30f 100644 --- a/dev-tools/idea/lucene/contrib/spatial/spatial.iml +++ b/dev-tools/idea/lucene/contrib/spatial/spatial.iml @@ -8,12 +8,12 @@ - - - + + + diff --git a/dev-tools/idea/lucene/contrib/spellchecker/spellchecker.iml b/dev-tools/idea/lucene/contrib/spellchecker/spellchecker.iml index 4656346a309..5ab8148deb0 100644 --- a/dev-tools/idea/lucene/contrib/spellchecker/spellchecker.iml +++ b/dev-tools/idea/lucene/contrib/spellchecker/spellchecker.iml @@ -8,12 +8,12 @@ - - - - + + + + diff --git a/dev-tools/idea/lucene/contrib/swing/swing.iml b/dev-tools/idea/lucene/contrib/swing/swing.iml index df1ade3afe3..a84cc08be50 100644 --- a/dev-tools/idea/lucene/contrib/swing/swing.iml +++ b/dev-tools/idea/lucene/contrib/swing/swing.iml @@ -8,12 +8,12 @@ - - - - + + + + diff --git a/dev-tools/idea/lucene/contrib/wordnet/wordnet.iml b/dev-tools/idea/lucene/contrib/wordnet/wordnet.iml index 3a4710441f6..0c142aa88a4 100644 --- a/dev-tools/idea/lucene/contrib/wordnet/wordnet.iml +++ b/dev-tools/idea/lucene/contrib/wordnet/wordnet.iml @@ -8,12 +8,12 @@ - - - - + + + + diff --git a/dev-tools/idea/lucene/contrib/xml-query-parser/xml-query-parser.iml b/dev-tools/idea/lucene/contrib/xml-query-parser/xml-query-parser.iml index 6150f8af9c4..51625efa3fc 100644 --- a/dev-tools/idea/lucene/contrib/xml-query-parser/xml-query-parser.iml +++ b/dev-tools/idea/lucene/contrib/xml-query-parser/xml-query-parser.iml @@ -10,13 +10,13 @@ - - - - + + + + diff --git a/dev-tools/idea/modules/analysis/common/common.iml b/dev-tools/idea/modules/analysis/common/common.iml index 57888ab9140..4882b813b62 100644 --- a/dev-tools/idea/modules/analysis/common/common.iml +++ b/dev-tools/idea/modules/analysis/common/common.iml @@ -10,9 +10,9 @@ - + diff --git a/dev-tools/idea/modules/analysis/icu/icu.iml b/dev-tools/idea/modules/analysis/icu/icu.iml index d116f88dab0..a3ed3d531af 100644 --- a/dev-tools/idea/modules/analysis/icu/icu.iml +++ b/dev-tools/idea/modules/analysis/icu/icu.iml @@ -10,8 +10,6 @@ - - @@ -25,5 +23,7 @@ + + diff --git a/dev-tools/idea/modules/analysis/phonetic/phonetic.iml b/dev-tools/idea/modules/analysis/phonetic/phonetic.iml index 57671e52242..0190e7404eb 100644 --- a/dev-tools/idea/modules/analysis/phonetic/phonetic.iml +++ b/dev-tools/idea/modules/analysis/phonetic/phonetic.iml @@ -8,8 +8,6 @@ - - @@ -23,5 +21,7 @@ + + diff --git a/dev-tools/idea/modules/analysis/smartcn/smartcn.iml b/dev-tools/idea/modules/analysis/smartcn/smartcn.iml index e014369cd24..165df39b0aa 100644 --- a/dev-tools/idea/modules/analysis/smartcn/smartcn.iml +++ b/dev-tools/idea/modules/analysis/smartcn/smartcn.iml @@ -9,10 +9,10 @@ - - + + diff --git a/dev-tools/idea/modules/analysis/stempel/stempel.iml b/dev-tools/idea/modules/analysis/stempel/stempel.iml index 0348e3cc1fe..fbea9c0257e 100644 --- a/dev-tools/idea/modules/analysis/stempel/stempel.iml +++ b/dev-tools/idea/modules/analysis/stempel/stempel.iml @@ -9,10 +9,10 @@ - - + + diff --git a/dev-tools/idea/modules/benchmark/benchmark.iml b/dev-tools/idea/modules/benchmark/benchmark.iml index 783333be8df..16fc06f2267 100644 --- a/dev-tools/idea/modules/benchmark/benchmark.iml +++ b/dev-tools/idea/modules/benchmark/benchmark.iml @@ -10,13 +10,6 @@ - - - - - - - @@ -30,5 +23,12 @@ + + + + + + + diff --git a/dev-tools/idea/solr/contrib/analysis-extras/analysis-extras.iml b/dev-tools/idea/solr/contrib/analysis-extras/analysis-extras.iml index 0cdc269bcd3..0861ae2bace 100644 --- a/dev-tools/idea/solr/contrib/analysis-extras/analysis-extras.iml +++ b/dev-tools/idea/solr/contrib/analysis-extras/analysis-extras.iml @@ -10,12 +10,6 @@ - - - - - - @@ -30,5 +24,11 @@ + + + + + + diff --git a/dev-tools/idea/solr/contrib/clustering/clustering.iml b/dev-tools/idea/solr/contrib/clustering/clustering.iml index aa2c18fed22..6f9e756d218 100644 --- a/dev-tools/idea/solr/contrib/clustering/clustering.iml +++ b/dev-tools/idea/solr/contrib/clustering/clustering.iml @@ -10,16 +10,6 @@ - - - - - - - - - - @@ -35,5 +25,15 @@ + + + + + + + + + + diff --git a/dev-tools/idea/solr/contrib/dataimporthandler/dataimporthandler.iml b/dev-tools/idea/solr/contrib/dataimporthandler/dataimporthandler.iml index 49652d0cdee..220ad8ceabd 100644 --- a/dev-tools/idea/solr/contrib/dataimporthandler/dataimporthandler.iml +++ b/dev-tools/idea/solr/contrib/dataimporthandler/dataimporthandler.iml @@ -11,12 +11,12 @@ - - - + + + diff --git a/dev-tools/idea/solr/contrib/dataimporthandler/src/extras/extras.iml b/dev-tools/idea/solr/contrib/dataimporthandler/src/extras/extras.iml index 7e6986bb709..7f35a8a24cd 100644 --- a/dev-tools/idea/solr/contrib/dataimporthandler/src/extras/extras.iml +++ b/dev-tools/idea/solr/contrib/dataimporthandler/src/extras/extras.iml @@ -9,14 +9,14 @@ - - - + + + diff --git a/dev-tools/idea/solr/contrib/extraction/extraction.iml b/dev-tools/idea/solr/contrib/extraction/extraction.iml index 32bda9774b2..e5493a86550 100644 --- a/dev-tools/idea/solr/contrib/extraction/extraction.iml +++ b/dev-tools/idea/solr/contrib/extraction/extraction.iml @@ -10,11 +10,11 @@ - - + + diff --git a/dev-tools/idea/solr/contrib/uima/uima.iml b/dev-tools/idea/solr/contrib/uima/uima.iml index b1aafa79912..099f6f614c8 100644 --- a/dev-tools/idea/solr/contrib/uima/uima.iml +++ b/dev-tools/idea/solr/contrib/uima/uima.iml @@ -11,8 +11,6 @@ - - @@ -26,5 +24,7 @@ + + diff --git a/dev-tools/idea/solr/solr.iml b/dev-tools/idea/solr/solr.iml index 0a45dd1db1d..6d1951d9a7d 100644 --- a/dev-tools/idea/solr/solr.iml +++ b/dev-tools/idea/solr/solr.iml @@ -12,23 +12,24 @@ + + + + + - + - - - - diff --git a/dev-tools/maven/pom.xml.template b/dev-tools/maven/pom.xml.template index 1e72a1fdb4d..95987f4c141 100644 --- a/dev-tools/maven/pom.xml.template +++ b/dev-tools/maven/pom.xml.template @@ -132,6 +132,11 @@ lucene-icu4j ${project.version} + + com.google.guava + guava + r05 + com.sleepycat berkeleydb diff --git a/dev-tools/maven/solr/contrib/analysis-extras/pom.xml.template b/dev-tools/maven/solr/contrib/analysis-extras/pom.xml.template index ee6c8392384..59b0d0acf7e 100644 --- a/dev-tools/maven/solr/contrib/analysis-extras/pom.xml.template +++ b/dev-tools/maven/solr/contrib/analysis-extras/pom.xml.template @@ -44,9 +44,8 @@ ${project.groupId} - solr-core + solr-test-framework ${project.version} - test-jar test @@ -85,6 +84,11 @@ junit test + + javax.servlet + servlet-api + test + ${build-directory} @@ -94,7 +98,7 @@ src/test - test-files + src/test-files ../../src/test-files diff --git a/dev-tools/maven/solr/contrib/clustering/pom.xml.template b/dev-tools/maven/solr/contrib/clustering/pom.xml.template index 8a10434de47..fd205c1ea57 100644 --- a/dev-tools/maven/solr/contrib/clustering/pom.xml.template +++ b/dev-tools/maven/solr/contrib/clustering/pom.xml.template @@ -44,9 +44,8 @@ ${project.groupId} - solr-core + solr-test-framework ${project.version} - test-jar test diff --git a/dev-tools/maven/solr/contrib/dataimporthandler/src/extras/pom.xml.template b/dev-tools/maven/solr/contrib/dataimporthandler/src/extras/pom.xml.template index 20de6cbf843..a9ee1f774b3 100644 --- a/dev-tools/maven/solr/contrib/dataimporthandler/src/extras/pom.xml.template +++ b/dev-tools/maven/solr/contrib/dataimporthandler/src/extras/pom.xml.template @@ -44,9 +44,8 @@ ${project.groupId} - solr-core + solr-test-framework ${project.version} - test-jar test diff --git a/dev-tools/maven/solr/contrib/dataimporthandler/src/pom.xml.template b/dev-tools/maven/solr/contrib/dataimporthandler/src/pom.xml.template index ba247a354b2..302ac33827d 100644 --- a/dev-tools/maven/solr/contrib/dataimporthandler/src/pom.xml.template +++ b/dev-tools/maven/solr/contrib/dataimporthandler/src/pom.xml.template @@ -43,10 +43,9 @@ ${project.version} - ${project.groupId} - solr-core + org.apache.solr + solr-test-framework ${project.version} - test-jar test diff --git a/dev-tools/maven/solr/contrib/extraction/pom.xml.template b/dev-tools/maven/solr/contrib/extraction/pom.xml.template index e1a2581ff17..da012e5c8cd 100644 --- a/dev-tools/maven/solr/contrib/extraction/pom.xml.template +++ b/dev-tools/maven/solr/contrib/extraction/pom.xml.template @@ -47,9 +47,8 @@ ${project.groupId} - solr-core + solr-test-framework ${project.version} - test-jar test diff --git a/dev-tools/maven/solr/contrib/uima/pom.xml.template b/dev-tools/maven/solr/contrib/uima/pom.xml.template index 6e58ddb464d..62b8a4bad37 100644 --- a/dev-tools/maven/solr/contrib/uima/pom.xml.template +++ b/dev-tools/maven/solr/contrib/uima/pom.xml.template @@ -44,9 +44,8 @@ ${project.groupId} - solr-core + solr-test-framework ${project.version} - test-jar test diff --git a/dev-tools/maven/solr/pom.xml.template b/dev-tools/maven/solr/pom.xml.template index f866ca184d5..1b552ff1c57 100644 --- a/dev-tools/maven/solr/pom.xml.template +++ b/dev-tools/maven/solr/pom.xml.template @@ -35,6 +35,7 @@ src src/solrj src/webapp + src/test-framework contrib diff --git a/dev-tools/maven/solr/src/pom.xml.template b/dev-tools/maven/solr/src/pom.xml.template index 2457615a103..85ddb316d66 100644 --- a/dev-tools/maven/solr/src/pom.xml.template +++ b/dev-tools/maven/solr/src/pom.xml.template @@ -156,6 +156,11 @@ servlet-api provided + + com.google.guava + guava + test + junit junit @@ -197,17 +202,6 @@ - - org.apache.maven.plugins - maven-jar-plugin - - - - test-jar - - - - org.apache.maven.plugins maven-surefire-plugin @@ -248,6 +242,24 @@ + + org.codehaus.mojo + build-helper-maven-plugin + + + add-test-source + generate-test-sources + + add-test-source + + + + test-framework + + + + + diff --git a/dev-tools/maven/solr/src/solrj/pom.xml.template b/dev-tools/maven/solr/src/solrj/pom.xml.template index a0d67ebbdb9..072e1ef5286 100644 --- a/dev-tools/maven/solr/src/solrj/pom.xml.template +++ b/dev-tools/maven/solr/src/solrj/pom.xml.template @@ -42,12 +42,6 @@ lucene-core ${project.version} - - org.apache.lucene - lucene-test-framework - ${project.version} - test - org.apache.lucene lucene-analyzers-common @@ -88,11 +82,6 @@ org.slf4j slf4j-api - - junit - junit - test - ${build-directory} diff --git a/dev-tools/maven/solr/src/test-framework/pom.xml.template b/dev-tools/maven/solr/src/test-framework/pom.xml.template new file mode 100644 index 00000000000..fc7875766f2 --- /dev/null +++ b/dev-tools/maven/solr/src/test-framework/pom.xml.template @@ -0,0 +1,76 @@ + + + 4.0.0 + + org.apache.solr + solr-parent + @version@ + ../../pom.xml + + org.apache.solr + solr-test-framework + jar + Apache Solr Test Framework + Apache Solr Test Framework + + solr/src/test-framework + ../../build + + + + ${project.groupId} + solr-core + ${project.version} + + + org.apache.lucene + lucene-test-framework + ${project.version} + + + junit + junit + + + + ${build-directory} + ${build-directory}/classes/test-framework + . + + + . + + **/*.java + + + + + + org.apache.maven.plugins + maven-surefire-plugin + + true + + + + + diff --git a/lucene/.cvsignore b/lucene/.cvsignore deleted file mode 100644 index bd8ad3a3a28..00000000000 --- a/lucene/.cvsignore +++ /dev/null @@ -1,5 +0,0 @@ -build -dist -*~ -velocity.log -build.properties diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 4784ca1a609..2a9553bd9ae 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -141,6 +141,11 @@ Changes in backwards compatibility policy * LUCENE-2315: AttributeSource's methods for accessing attributes are now final, else its easy to corrupt the internal states. (Uwe Schindler) +* LUCENE-2814: The IndexWriter.flush method no longer takes "boolean + flushDocStores" argument, as we now always flush doc stores (index + files holding stored fields and term vectors) while flushing a + segment. (Mike McCandless) + Changes in Runtime Behavior * LUCENE-2846: omitNorms now behaves like omitTermFrequencyAndPositions, if you @@ -157,11 +162,10 @@ Changes in Runtime Behavior * LUCENE-2720: IndexWriter throws IndexFormatTooOldException on open, rather than later when e.g. a merge starts. (Shai Erera, Mike McCandless, Uwe Schindler) -* LUCENE-1076: The default merge policy is now able to merge - non-contiguous segments, which means docIDs no longer necessarily - say "in order". If this is a problem then you can use either of the - LogMergePolicy impls, and call setRequireContiguousMerge(true). - (Mike McCandless) +* LUCENE-1076: The default merge policy (TieredMergePolicy) is now + able to merge non-contiguous segments, which means docIDs no longer + necessarily say "in order". If this is a problem then you can use + either of the LogMergePolicy impls. (Mike McCandless) * LUCENE-2881: FieldInfos is now tracked per segment. Before it was tracked per IndexWriter session, which resulted in FieldInfos that had the FieldInfo @@ -169,6 +173,70 @@ Changes in Runtime Behavior globally across IndexWriter sessions and persisted into a X.fnx file on successful commit. The corresponding file format changes are backwards- compatible. (Michael Busch, Simon Willnauer) + +* LUCENE-2956, LUCENE-2573, LUCENE-2324, LUCENE-2555: Changes from + DocumentsWriterPerThread: + + - IndexWriter now uses a DocumentsWriter per thread when indexing documents. + Each DocumentsWriterPerThread indexes documents in its own private segment, + and the in memory segments are no longer merged on flush. Instead, each + segment is separately flushed to disk and subsequently merged with normal + segment merging. + + - DocumentsWriterPerThread (DWPT) is now flushed concurrently based on a + FlushPolicy. When a DWPT is flushed, a fresh DWPT is swapped in so that + indexing may continue concurrently with flushing. The selected + DWPT flushes all its RAM resident documents do disk. Note: Segment flushes + don't flush all RAM resident documents but only the documents private to + the DWPT selected for flushing. + + - Flushing is now controlled by FlushPolicy that is called for every add, + update or delete on IndexWriter. By default DWPTs are flushed either on + maxBufferedDocs per DWPT or the global active used memory. Once the active + memory exceeds ramBufferSizeMB only the largest DWPT is selected for + flushing and the memory used by this DWPT is substracted from the active + memory and added to a flushing memory pool, which can lead to temporarily + higher memory usage due to ongoing indexing. + + - IndexWriter now can utilize ramBufferSize > 2048 MB. Each DWPT can address + up to 2048 MB memory such that the ramBufferSize is now bounded by the max + number of DWPT avaliable in the used DocumentsWriterPerThreadPool. + IndexWriters net memory consumption can grow far beyond the 2048 MB limit if + the applicatoin can use all available DWPTs. To prevent a DWPT from + exhausting its address space IndexWriter will forcefully flush a DWPT if its + hard memory limit is exceeded. The RAMPerThreadHardLimitMB can be controlled + via IndexWriterConfig and defaults to 1945 MB. + Since IndexWriter flushes DWPT concurrently not all memory is released + immediately. Applications should still use a ramBufferSize significantly + lower than the JVMs avaliable heap memory since under high load multiple + flushing DWPT can consume substantial transient memory when IO performance + is slow relative to indexing rate. + + - IndexWriter#commit now doesn't block concurrent indexing while flushing all + 'currently' RAM resident documents to disk. Yet, flushes that occur while a + a full flush is running are queued and will happen after all DWPT involved + in the full flush are done flushing. Applications using multiple threads + during indexing and trigger a full flush (eg call commmit() or open a new + NRT reader) can use significantly more transient memory. + + - IndexWriter#addDocument and IndexWriter.updateDocument can block indexing + threads if the number of active + number of flushing DWPT exceed a + safety limit. By default this happens if 2 * max number available thread + states (DWPTPool) is exceeded. This safety limit prevents applications from + exhausting their available memory if flushing can't keep up with + concurrently indexing threads. + + - IndexWriter only applies and flushes deletes if the maxBufferedDelTerms + limit is reached during indexing. No segment flushes will be triggered + due to this setting. + + - IndexWriter#flush(boolean, boolean) doesn't synchronized on IndexWriter + anymore. A dedicated flushLock has been introduced to prevent multiple full- + flushes happening concurrently. + + - DocumentsWriter doesn't write shared doc stores anymore. + + (Mike McCandless, Michael Busch, Simon Willnauer) API Changes @@ -334,6 +402,16 @@ New features * LUCENE-2862: Added TermsEnum.totalTermFreq() and Terms.getSumTotalTermFreq(). (Mike McCandless, Robert Muir) +* LUCENE-3001: Added TrieFieldHelper to write solr compatible numeric + fields without the solr dependency. (ryan) + +* LUCENE-3003: Added new expert class oal.index.DocTermsOrd, + refactored from Solr's UnInvertedField, for accessing term ords for + multi-valued fields, per document. This is similar to FieldCache in + that it inverts the index to compute the ords, but differs in that + it's able to handle multi-valued fields and does not hold the term + bytes in RAM. (Mike McCandless) + Optimizations * LUCENE-2588: Don't store unnecessary suffixes when writing the terms @@ -366,6 +444,21 @@ Bug fixes * LUCENE-2936: PhraseQuery score explanations were not correctly identifying matches vs non-matches. (hossman) +* LUCENE-2996: addIndexes(IndexReader) did not flush before adding the new + indexes, causing existing deletions to be applied on the incoming indexes as + well. (Shai Erera, Mike McCandless) + +Test Cases + +* LUCENE-3002: added 'tests.iter.min' to control 'tests.iter' by allowing to + stop iterating if at least 'tests.iter.min' ran and a failure occured. + (Shai Erera, Chris Hostetter) + +Build + +* LUCENE-3006: Building javadocs will fail on warnings by default. + Override with -Dfailonjavadocwarning=false (sarowe, gsingers) + ======================= Lucene 3.x (not yet released) ======================= Changes in backwards compatibility policy @@ -380,7 +473,21 @@ Optimizations * LUCENE-2990: ArrayUtil/CollectionUtil.*Sort() methods now exit early on empty or one-element lists/arrays. (Uwe Schindler) -======================= Lucene 3.1 (not yet released) ======================= +Bug fixes + +* LUCENE-3024: Index with more than 2.1B terms was hitting AIOOBE when + seeking TermEnum (eg used by Solr's faceting) (Tom Burton-West, Mike + McCandless) + +* LUCENE-3042: When a filter or consumer added Attributes to a TokenStream + chain after it was already (partly) consumed [or clearAttributes(), + captureState(), cloneAttributes(),... was called by the Tokenizer], + the Tokenizer calling clearAttributes() or capturing state after addition + may not do this on the newly added Attribute. This bug affected only + very special use cases of the TokenStream-API, most users would not + have recognized it. (Uwe Schindler, Robert Muir) + +======================= Lucene 3.1.0 ======================= Changes in backwards compatibility policy @@ -396,7 +503,7 @@ Changes in backwards compatibility policy * LUCENE-2190: Removed deprecated customScore() and customExplain() methods from experimental CustomScoreQuery. (Uwe Schindler) - + * LUCENE-2286: Enabled DefaultSimilarity.setDiscountOverlaps by default. This means that terms with a position increment gap of zero do not affect the norms calculation by default. (Robert Muir) @@ -434,10 +541,10 @@ Changes in backwards compatibility policy actual file's length if the file exists, and throws FileNotFoundException otherwise. Returning length=0 for a non-existent file is no longer allowed. If you relied on that, make sure to catch the exception. (Shai Erera) - + * LUCENE-2386: IndexWriter no longer performs an empty commit upon new index creation. Previously, if you passed an empty Directory and set OpenMode to - CREATE*, IndexWriter would make a first empty commit. If you need that + CREATE*, IndexWriter would make a first empty commit. If you need that behavior you can call writer.commit()/close() immediately after you create it. (Shai Erera, Mike McCandless) @@ -453,10 +560,10 @@ Changes in backwards compatibility policy values in multi-valued field has been changed for some cases in index. If you index empty fields and uses positions/offsets information on that fields, reindex is recommended. (David Smiley, Koji Sekiguchi) - + * LUCENE-2804: Directory.setLockFactory new declares throwing an IOException. (Shai Erera, Robert Muir) - + * LUCENE-2837: Added deprecations noting that in 4.0, Searcher and Searchable are collapsed into IndexSearcher; contrib/remote and MultiSearcher have been removed. (Mike McCandless) @@ -483,7 +590,7 @@ Changes in runtime behavior * LUCENE-2179: CharArraySet.clear() is now functional. (Robert Muir, Uwe Schindler) -* LUCENE-2455: IndexWriter.addIndexes no longer optimizes the target index +* LUCENE-2455: IndexWriter.addIndexes no longer optimizes the target index before it adds the new ones. Also, the existing segments are not merged and so the index will not end up with a single segment (unless it was empty before). In addition, addIndexesNoOptimize was renamed to addIndexes and no longer @@ -502,9 +609,9 @@ Changes in runtime behavior usage, allowing applications to accidentally open two writers on the same directory. (Mike McCandless) -* LUCENE-2701: maxMergeMBForOptimize and maxMergeDocs constraints set on - LogMergePolicy now affect optimize() as well (as opposed to only regular - merges). This means that you can run optimize() and too large segments won't +* LUCENE-2701: maxMergeMBForOptimize and maxMergeDocs constraints set on + LogMergePolicy now affect optimize() as well (as opposed to only regular + merges). This means that you can run optimize() and too large segments won't be merged. (Shai Erera) * LUCENE-2753: IndexReader and DirectoryReader .listCommits() now return a List, @@ -514,9 +621,9 @@ Changes in runtime behavior the IndexSearcher search methods that take an int nDocs will now throw IllegalArgumentException if nDocs is 0. Instead, you should use the newly added TotalHitCountCollector. (Mike McCandless) - -* LUCENE-2790: LogMergePolicy.useCompoundFile's logic now factors in noCFSRatio - to determine whether the passed in segment should be compound. + +* LUCENE-2790: LogMergePolicy.useCompoundFile's logic now factors in noCFSRatio + to determine whether the passed in segment should be compound. (Shai Erera, Earwin Burrfoot) * LUCENE-2805: IndexWriter now increments the index version on every change to @@ -536,7 +643,7 @@ Changes in runtime behavior * LUCENE-2010: Segments with 100% deleted documents are now removed on IndexReader or IndexWriter commit. (Uwe Schindler, Mike McCandless) - + * LUCENE-2960: Allow some changes to IndexWriterConfig to take effect "live" (after an IW is instantiated), via IndexWriter.getConfig().setXXX(...) (Shay Banon, Mike McCandless) @@ -554,7 +661,7 @@ API Changes * LUCENE-2103: NoLockFactory should have a private constructor; until Lucene 4.0 the default one will be deprecated. - (Shai Erera via Uwe Schindler) + (Shai Erera via Uwe Schindler) * LUCENE-2177: Deprecate the Field ctors that take byte[] and Store. Since the removal of compressed fields, Store can only be YES, so @@ -574,30 +681,30 @@ API Changes files are no longer open by IndexReaders. (luocanrao via Mike McCandless) -* LUCENE-2282: IndexFileNames is exposed as a public class allowing for easier - use by external code. In addition it offers a matchExtension method which +* LUCENE-2282: IndexFileNames is exposed as a public class allowing for easier + use by external code. In addition it offers a matchExtension method which callers can use to query whether a certain file matches a certain extension. - (Shai Erera via Mike McCandless) + (Shai Erera via Mike McCandless) * LUCENE-124: Add a TopTermsBoostOnlyBooleanQueryRewrite to MultiTermQuery. This rewrite method is similar to TopTermsScoringBooleanQueryRewrite, but - only scores terms by their boost values. For example, this can be used - with FuzzyQuery to ensure that exact matches are always scored higher, + only scores terms by their boost values. For example, this can be used + with FuzzyQuery to ensure that exact matches are always scored higher, because only the boost will be used in scoring. (Robert Muir) - -* LUCENE-2015: Add a static method foldToASCII to ASCIIFoldingFilter to + +* LUCENE-2015: Add a static method foldToASCII to ASCIIFoldingFilter to expose its folding logic. (Cédrik Lime via Robert Muir) - -* LUCENE-2294: IndexWriter constructors have been deprecated in favor of a + +* LUCENE-2294: IndexWriter constructors have been deprecated in favor of a single ctor which accepts IndexWriterConfig and a Directory. You can set all - the parameters related to IndexWriter on IndexWriterConfig. The different - setter/getter methods were deprecated as well. One should call + the parameters related to IndexWriter on IndexWriterConfig. The different + setter/getter methods were deprecated as well. One should call writer.getConfig().getXYZ() to query for a parameter XYZ. - Additionally, the setter/getter related to MergePolicy were deprecated as + Additionally, the setter/getter related to MergePolicy were deprecated as well. One should interact with the MergePolicy directly. (Shai Erera via Mike McCandless) - -* LUCENE-2320: IndexWriter's MergePolicy configuration was moved to + +* LUCENE-2320: IndexWriter's MergePolicy configuration was moved to IndexWriterConfig and the respective methods on IndexWriter were deprecated. (Shai Erera via Mike McCandless) @@ -621,14 +728,14 @@ API Changes * LUCENE-2402: IndexWriter.deleteUnusedFiles now deletes unreferenced commit points too. If you use an IndexDeletionPolicy which holds onto index commits (such as SnapshotDeletionPolicy), you can call this method to remove those - commit points when they are not needed anymore (instead of waiting for the + commit points when they are not needed anymore (instead of waiting for the next commit). (Shai Erera) - + * LUCENE-2481: SnapshotDeletionPolicy.snapshot() and release() were replaced with equivalent ones that take a String (id) as argument. You can pass - whatever ID you want, as long as you use the same one when calling both. + whatever ID you want, as long as you use the same one when calling both. (Shai Erera) - + * LUCENE-2356: Add IndexWriterConfig.set/getReaderTermIndexDivisor, to set what IndexWriter passes for termsIndexDivisor to the readers it opens internally when apply deletions or creating a near-real-time @@ -638,7 +745,7 @@ API Changes in common/standard/ now implement the Word Break rules from the Unicode 6.0.0 Text Segmentation algorithm (UAX#29), covering the full range of Unicode code points, including values from U+FFFF to U+10FFFF - + ClassicTokenizer/Analyzer retains the old (pre-Lucene 3.1) StandardTokenizer/ Analyzer implementation and behavior. Only the Unicode Basic Multilingual Plane (code points from U+0000 to U+FFFF) is covered. @@ -646,16 +753,16 @@ API Changes UAX29URLEmailTokenizer tokenizes URLs and E-mail addresses according to the relevant RFCs, in addition to implementing the UAX#29 Word Break rules. (Steven Rowe, Robert Muir, Uwe Schindler) - + * LUCENE-2778: RAMDirectory now exposes newRAMFile() which allows to override and return a different RAMFile implementation. (Shai Erera) - + * LUCENE-2785: Added TotalHitCountCollector whose sole purpose is to count the number of hits matching the query. (Mike McCandless) -* LUCENE-2846: Deprecated IndexReader.setNorm(int, String, float). This method - is only syntactic sugar for setNorm(int, String, byte), but using the global - Similarity.getDefault().encodeNormValue(). Use the byte-based method instead +* LUCENE-2846: Deprecated IndexReader.setNorm(int, String, float). This method + is only syntactic sugar for setNorm(int, String, byte), but using the global + Similarity.getDefault().encodeNormValue(). Use the byte-based method instead to ensure that the norm is encoded with your Similarity. (Robert Muir, Mike McCandless) @@ -676,6 +783,9 @@ API Changes for AttributeImpls, but can still be provided (if needed). (Uwe Schindler) +* LUCENE-2691: Deprecate IndexWriter.getReader in favor of + IndexReader.open(IndexWriter) (Grant Ingersoll, Mike McCandless) + * LUCENE-2876: Deprecated Scorer.getSimilarity(). If your Scorer uses a Similarity, it should keep it itself. Fixed Scorers to pass their parent Weight, so that Scorer.visitSubScorers (LUCENE-2590) will work correctly. @@ -687,7 +797,7 @@ API Changes expert use cases can handle seeing deleted documents returned. The deletes remain buffered so that the next time you open an NRT reader and pass true, all deletes will be a applied. (Mike McCandless) - + * LUCENE-1253: LengthFilter (and Solr's KeepWordTokenFilter) now require up front specification of enablePositionIncrement. Together with StopFilter they have a common base class (FilteringTokenFilter) that handles @@ -698,7 +808,7 @@ Bug fixes * LUCENE-2249: ParallelMultiSearcher should shut down thread pool on close. (Martin Traverso via Uwe Schindler) - + * LUCENE-2273: FieldCacheImpl.getCacheEntries() used WeakHashMap incorrectly and lead to ConcurrentModificationException. (Uwe Schindler, Robert Muir) @@ -709,7 +819,7 @@ Bug fixes * LUCENE-2074: Reduce buffer size of lexer back to default on reset. (Ruben Laguna, Shai Erera via Uwe Schindler) - + * LUCENE-2496: Don't throw NPE if IndexWriter is opened with CREATE on a prior (corrupt) index missing its segments_N file. (Mike McCandless) @@ -718,10 +828,10 @@ Bug fixes assuming whitespace tokenization. Previously all CJK queries, for example, would be turned into phrase queries. The old behavior is preserved with the matchVersion parameter for previous versions. Additionally, you can - explicitly enable the old behavior with setAutoGeneratePhraseQueries(true) + explicitly enable the old behavior with setAutoGeneratePhraseQueries(true) (Robert Muir) - -* LUCENE-2537: FSDirectory.copy() implementation was unsafe and could result in + +* LUCENE-2537: FSDirectory.copy() implementation was unsafe and could result in OOM if a large file was copied. (Shai Erera) * LUCENE-2580: MultiPhraseQuery throws AIOOBE if number of positions @@ -739,14 +849,14 @@ Bug fixes * LUCENE-2802: NRT DirectoryReader returned incorrect values from getVersion, isOptimized, getCommitUserData, getIndexCommit and isCurrent due - to a mutable reference to the IndexWriters SegmentInfos. + to a mutable reference to the IndexWriters SegmentInfos. (Simon Willnauer, Earwin Burrfoot) * LUCENE-2852: Fixed corner case in RAMInputStream that would hit a false EOF after seeking to EOF then seeking back to same block you were just in and then calling readBytes (Robert Muir, Mike McCandless) -* LUCENE-2860: Fixed SegmentInfo.sizeInBytes to factor includeDocStores when it +* LUCENE-2860: Fixed SegmentInfo.sizeInBytes to factor includeDocStores when it decides whether to return the cached computed size or not. (Shai Erera) * LUCENE-2584: SegmentInfo.files() could hit ConcurrentModificationException if @@ -759,7 +869,7 @@ Bug fixes internally, it now calls Similarity.idfExplain(Collection, IndexSearcher). (Robert Muir) -* LUCENE-2693: RAM used by IndexWriter was slightly incorrectly computed. +* LUCENE-2693: RAM used by IndexWriter was slightly incorrectly computed. (Jason Rutherglen via Shai Erera) * LUCENE-1846: DateTools now uses the US locale everywhere, so DateTools.round() @@ -775,6 +885,9 @@ Bug fixes been rounded down to 0 instead of being rounded up to the smallest positive number. (yonik) +* LUCENE-2936: PhraseQuery score explanations were not correctly + identifying matches vs non-matches. (hossman) + * LUCENE-2975: A hotspot bug corrupts IndexInput#readVInt()/readVLong() if the underlying readByte() is inlined (which happens e.g. in MMapDirectory). The loop was unwinded which makes the hotspot bug disappear. @@ -783,30 +896,30 @@ Bug fixes New features * LUCENE-2128: Parallelized fetching document frequencies during weight - creation. (Israel Tsadok, Simon Willnauer via Uwe Schindler) + creation. (Israel Tsadok, Simon Willnauer via Uwe Schindler) * LUCENE-2069: Added Unicode 4 support to CharArraySet. Due to the switch to Java 5, supplementary characters are now lowercased correctly if the set is created as case insensitive. - CharArraySet now requires a Version argument to preserve - backwards compatibility. If Version < 3.1 is passed to the constructor, + CharArraySet now requires a Version argument to preserve + backwards compatibility. If Version < 3.1 is passed to the constructor, CharArraySet yields the old behavior. (Simon Willnauer) - + * LUCENE-2069: Added Unicode 4 support to LowerCaseFilter. Due to the switch to Java 5, supplementary characters are now lowercased correctly. - LowerCaseFilter now requires a Version argument to preserve - backwards compatibility. If Version < 3.1 is passed to the constructor, - LowerCaseFilter yields the old behavior. (Simon Willnauer, Robert Muir) + LowerCaseFilter now requires a Version argument to preserve + backwards compatibility. If Version < 3.1 is passed to the constructor, + LowerCaseFilter yields the old behavior. (Simon Willnauer, Robert Muir) * LUCENE-2034: Added ReusableAnalyzerBase, an abstract subclass of Analyzer that makes it easier to reuse TokenStreams correctly. This issue also added StopwordAnalyzerBase, which improves consistency of all Analyzers that use - stopwords, and implement many analyzers in contrib with it. + stopwords, and implement many analyzers in contrib with it. (Simon Willnauer via Robert Muir) - + * LUCENE-2198, LUCENE-2901: Support protected words in stemming TokenFilters using a new KeywordAttribute. (Simon Willnauer, Drew Farris via Uwe Schindler) - + * LUCENE-2183, LUCENE-2240, LUCENE-2241: Added Unicode 4 support to CharTokenizer and its subclasses. CharTokenizer now has new int-API which is conditionally preferred to the old char-API depending @@ -815,8 +928,8 @@ New features * LUCENE-2247: Added a CharArrayMap for performance improvements in some stemmers and synonym filters. (Uwe Schindler) - -* LUCENE-2320: Added SetOnce which wraps an object and allows it to be set + +* LUCENE-2320: Added SetOnce which wraps an object and allows it to be set exactly once. (Shai Erera via Mike McCandless) * LUCENE-2314: Added AttributeSource.copyTo(AttributeSource) that @@ -843,19 +956,19 @@ New features Directory.copyTo, and use nio's FileChannel.transferTo when copying files between FSDirectory instances. (Earwin Burrfoot via Mike McCandless). - + * LUCENE-2074: Make StandardTokenizer fit for Unicode 4.0, if the matchVersion parameter is Version.LUCENE_31. (Uwe Schindler) * LUCENE-2385: Moved NoDeletionPolicy from benchmark to core. NoDeletionPolicy can be used to prevent commits from ever getting deleted from the index. (Shai Erera) - -* LUCENE-1585: IndexWriter now accepts a PayloadProcessorProvider which can - return a DirPayloadProcessor for a given Directory, which returns a - PayloadProcessor for a given Term. The PayloadProcessor will be used to + +* LUCENE-1585: IndexWriter now accepts a PayloadProcessorProvider which can + return a DirPayloadProcessor for a given Directory, which returns a + PayloadProcessor for a given Term. The PayloadProcessor will be used to process the payloads of the segments as they are merged (e.g. if one wants to - rewrite payloads of external indexes as they are added, or of local ones). + rewrite payloads of external indexes as they are added, or of local ones). (Shai Erera, Michael Busch, Mike McCandless) * LUCENE-2440: Add support for custom ExecutorService in @@ -868,7 +981,7 @@ New features * LUCENE-2526: Don't throw NPE from MultiPhraseQuery.toString when it's empty. (Ross Woolf via Mike McCandless) - + * LUCENE-2559: Added SegmentReader.reopen methods (John Wang via Mike McCandless) @@ -884,17 +997,20 @@ New features to add span support: SpanMultiTermQueryWrapper. Using this wrapper its easy to add fuzzy/wildcard to e.g. a SpanNearQuery. (Robert Muir, Uwe Schindler) - + * LUCENE-2838: ConstantScoreQuery now directly supports wrapping a Query instance for stripping off scores. The use of a QueryWrapperFilter is no longer needed and discouraged for that use case. Directly wrapping Query improves performance, as out-of-order collection is now supported. (Uwe Schindler) -* LUCENE-2864: Add getMaxTermFrequency (maximum within-document TF) to +* LUCENE-2864: Add getMaxTermFrequency (maximum within-document TF) to FieldInvertState so that it can be used in Similarity.computeNorm. (Robert Muir) +* LUCENE-2720: Segments now record the code version which created them. + (Shai Erera, Mike McCandless, Uwe Schindler) + * LUCENE-2474: Added expert ReaderFinishedListener API to IndexReader, to allow apps that maintain external per-segment caches to evict entries when a segment is finished. (Shay Banon, Yonik @@ -903,8 +1019,8 @@ New features * LUCENE-2911: The new StandardTokenizer, UAX29URLEmailTokenizer, and the ICUTokenizer in contrib now all tag types with a consistent set of token types (defined in StandardTokenizer). Tokens in the major - CJK types are explicitly marked to allow for custom downstream handling: - , , , and . + CJK types are explicitly marked to allow for custom downstream handling: + , , , and . (Robert Muir, Steven Rowe) * LUCENE-2913: Add missing getters to Numeric* classes. (Uwe Schindler) @@ -929,7 +1045,7 @@ Optimizations * LUCENE-2137: Switch to AtomicInteger for some ref counting (Earwin Burrfoot via Mike McCandless) -* LUCENE-2123, LUCENE-2261: Move FuzzyQuery rewrite to separate RewriteMode +* LUCENE-2123, LUCENE-2261: Move FuzzyQuery rewrite to separate RewriteMode into MultiTermQuery. The number of fuzzy expansions can be specified with the maxExpansions parameter to FuzzyQuery. (Uwe Schindler, Robert Muir, Mike McCandless) @@ -963,12 +1079,12 @@ Optimizations TermAttributeImpl, move DEFAULT_TYPE constant to TypeInterface, improve null-handling for TypeAttribute. (Uwe Schindler) -* LUCENE-2329: Switch TermsHash* from using a PostingList object per unique +* LUCENE-2329: Switch TermsHash* from using a PostingList object per unique term to parallel arrays, indexed by termID. This reduces garbage collection overhead significantly, which results in great indexing performance wins when the available JVM heap space is low. This will become even more important when the DocumentsWriter RAM buffer is searchable in the future, - because then it will make sense to make the RAM buffers as large as + because then it will make sense to make the RAM buffers as large as possible. (Mike McCandless, Michael Busch) * LUCENE-2380: The terms field cache methods (getTerms, @@ -983,7 +1099,7 @@ Optimizations causing too many fallbacks to compare-by-value (instead of by-ord). (Mike McCandless) -* LUCENE-2574: IndexInput exposes copyBytes(IndexOutput, long) to allow for +* LUCENE-2574: IndexInput exposes copyBytes(IndexOutput, long) to allow for efficient copying by sub-classes. Optimized copy is implemented for RAM and FS streams. (Shai Erera) @@ -1006,15 +1122,15 @@ Optimizations * LUCENE-2010: Segments with 100% deleted documents are now removed on IndexReader or IndexWriter commit. (Uwe Schindler, Mike McCandless) - + * LUCENE-1472: Removed synchronization from static DateTools methods by using a ThreadLocal. Also converted DateTools.Resolution to a Java 5 enum (this should not break backwards). (Uwe Schindler) Build -* LUCENE-2124: Moved the JDK-based collation support from contrib/collation - into core, and moved the ICU-based collation support into contrib/icu. +* LUCENE-2124: Moved the JDK-based collation support from contrib/collation + into core, and moved the ICU-based collation support into contrib/icu. (Robert Muir) * LUCENE-2326: Removed SVN checkouts for backwards tests. The backwards @@ -1026,14 +1142,14 @@ Build * LUCENE-1709: Tests are now parallelized by default (except for benchmark). You can force them to run sequentially by passing -Drunsequential=1 on the command - line. The number of threads that are spawned per CPU defaults to '1'. If you + line. The number of threads that are spawned per CPU defaults to '1'. If you wish to change that, you can run the tests with -DthreadsPerProcessor=[num]. (Robert Muir, Shai Erera, Peter Kofler) * LUCENE-2516: Backwards tests are now compiled against released lucene-core.jar from tarball of previous version. Backwards tests are now packaged together with src distribution. (Uwe Schindler) - + * LUCENE-2611: Added Ant target to install IntelliJ IDEA configuration: "ant idea". See http://wiki.apache.org/lucene-java/HowtoConfigureIntelliJ (Steven Rowe) @@ -1042,8 +1158,8 @@ Build generating Maven artifacts (Steven Rowe) * LUCENE-2609: Added jar-test-framework Ant target which packages Lucene's - tests' framework classes. (Drew Farris, Grant Ingersoll, Shai Erera, Steven - Rowe) + tests' framework classes. (Drew Farris, Grant Ingersoll, Shai Erera, + Steven Rowe) Test Cases @@ -1079,18 +1195,18 @@ Test Cases access to "real" files from the test folder itself, can use LuceneTestCase(J4).getDataFile(). (Uwe Schindler) -* LUCENE-2398, LUCENE-2611: Improve tests to work better from IDEs such +* LUCENE-2398, LUCENE-2611: Improve tests to work better from IDEs such as Eclipse and IntelliJ. (Paolo Castagna, Steven Rowe via Robert Muir) * LUCENE-2804: add newFSDirectory to LuceneTestCase to create a FSDirectory at random. (Shai Erera, Robert Muir) - + Documentation * LUCENE-2579: Fix oal.search's package.html description of abstract methods. (Santiago M. Mola via Mike McCandless) - + * LUCENE-2625: Add a note to IndexReader.termDocs() with additional verbiage that the TermEnum must be seeked since it is unpositioned. (Adriano Crestani via Robert Muir) diff --git a/lucene/MIGRATE.txt b/lucene/MIGRATE.txt index 79cb6da939a..779b6309a44 100644 --- a/lucene/MIGRATE.txt +++ b/lucene/MIGRATE.txt @@ -356,3 +356,9 @@ LUCENE-1458, LUCENE-2111: Flexible Indexing field as a parameter, this is removed due to the fact the entire Similarity (all methods) can now be configured per-field. Methods that apply to the entire query such as coord() and queryNorm() exist in SimilarityProvider. + +* LUCENE-1076: TieredMergePolicy is now the default merge policy. + It's able to merge non-contiguous segments; this may cause problems + for applications that rely on Lucene's internal document ID + assigment. If so, you should instead use LogByteSize/DocMergePolicy + during indexing. diff --git a/lucene/build.xml b/lucene/build.xml index 4d245d595f2..3a0a522249a 100644 --- a/lucene/build.xml +++ b/lucene/build.xml @@ -152,6 +152,7 @@ DEPRECATED - Doing Nothing. See http://wiki.apache.org/lucene-java/HowToUpdateTheWebsite + @@ -194,6 +195,17 @@ + + + + + + + + + + + @@ -424,9 +436,12 @@ - + + + + diff --git a/lucene/common-build.xml b/lucene/common-build.xml index 38327fec496..f8db3369b21 100644 --- a/lucene/common-build.xml +++ b/lucene/common-build.xml @@ -78,6 +78,7 @@ + @@ -102,6 +103,7 @@ + @@ -306,7 +308,7 @@ - + @@ -507,6 +509,8 @@ + + @@ -561,7 +565,7 @@ - + @@ -759,7 +763,8 @@ - + + @@ -769,6 +774,7 @@ + + + + + + + + + + + + + + + + + + + + + diff --git a/lucene/contrib/CHANGES.txt b/lucene/contrib/CHANGES.txt index 376a1ecafe1..46a60c87712 100644 --- a/lucene/contrib/CHANGES.txt +++ b/lucene/contrib/CHANGES.txt @@ -4,107 +4,138 @@ Lucene contrib change Log Build - * LUCENE-2413: Moved the demo out of lucene core and into contrib/demo. - (Robert Muir) + * LUCENE-2845: Moved contrib/benchmark to modules. New Features - * LUCENE-2604: Added RegexpQuery support to contrib/queryparser. - (Simon Willnauer, Robert Muir) + * LUCENE-2604: Added RegexpQuery support to contrib/queryparser. + (Simon Willnauer, Robert Muir) - * LUCENE-2500: Added DirectIOLinuxDirectory, a Linux-specific - Directory impl that uses the O_DIRECT flag to bypass the buffer - cache. This is useful to prevent segment merging from evicting - pages from the buffer cache, since fadvise/madvise do not seem. - (Michael McCandless) + * LUCENE-2373: Added a Codec implementation that works with append-only + filesystems (such as e.g. Hadoop DFS). SegmentInfos writing/reading + code is refactored to support append-only FS, and to allow for future + customization of per-segment information. (Andrzej Bialecki) - * LUCENE-2373: Added a Codec implementation that works with append-only - filesystems (such as e.g. Hadoop DFS). SegmentInfos writing/reading - code is refactored to support append-only FS, and to allow for future - customization of per-segment information. (Andrzej Bialecki) + * LUCENE-2479: Added ability to provide a sort comparator for spelling suggestions along + with two implementations. The existing comparator (score, then frequency) is the default (Grant Ingersoll) - * LUCENE-2479: Added ability to provide a sort comparator for spelling suggestions along - with two implementations. The existing comparator (score, then frequency) is the default (Grant Ingersoll) - - * LUCENE-2608: Added the ability to specify the accuracy at method time in the SpellChecker. The per class - method is also still available. (Grant Ingersoll) + * LUCENE-2608: Added the ability to specify the accuracy at method time in the SpellChecker. The per class + method is also still available. (Grant Ingersoll) - * LUCENE-2507: Added DirectSpellChecker, which retrieves correction candidates directly - from the term dictionary using levenshtein automata. (Robert Muir) + * LUCENE-2507: Added DirectSpellChecker, which retrieves correction candidates directly + from the term dictionary using levenshtein automata. (Robert Muir) - * LUCENE-2791: Added WindowsDirectory, a Windows-specific Directory impl - that doesn't synchronize on the file handle. This can be useful to - avoid the performance problems of SimpleFSDirectory and NIOFSDirectory. - (Robert Muir, Simon Willnauer, Uwe Schindler, Michael McCandless) + * LUCENE-2836: Add FieldCacheRewriteMethod, which rewrites MultiTermQueries + using the FieldCache's TermsEnum. (Robert Muir) API Changes - * LUCENE-2606: Changed RegexCapabilities interface to fix thread - safety, serialization, and performance problems. If you have - written a custom RegexCapabilities it will need to be updated - to the new API. (Robert Muir, Uwe Schindler) + * LUCENE-2606: Changed RegexCapabilities interface to fix thread + safety, serialization, and performance problems. If you have + written a custom RegexCapabilities it will need to be updated + to the new API. (Robert Muir, Uwe Schindler) - * LUCENE-2638 MakeHighFreqTerms.TermStats public to make it more useful - for API use. (Andrzej Bialecki) + * LUCENE-2638 MakeHighFreqTerms.TermStats public to make it more useful + for API use. (Andrzej Bialecki) * LUCENE-2912: The field-specific hashmaps in SweetSpotSimilarity were removed. Instead, use SimilarityProvider to return different SweetSpotSimilaritys for different fields, this way all parameters (such as TF factors) can be customized on a per-field basis. (Robert Muir) + +Bug Fixes + + * LUCENE-3045: fixed QueryNodeImpl.containsTag(String key) that was + not lowercasing the key before checking for the tag (Adriano Crestani) ======================= Lucene 3.x (not yet released) ======================= -(No changes) +Bug Fixes -======================= Lucene 3.1 (not yet released) ======================= + * LUCENE-3045: fixed QueryNodeImpl.containsTag(String key) that was + not lowercasing the key before checking for the tag (Adriano Crestani) + + * LUCENE-3026: SmartChineseAnalyzer's WordTokenFilter threw NullPointerException + on sentences longer than 32,767 characters. (wangzhenghang via Robert Muir) + + * LUCENE-2939: Highlighter should try and use maxDocCharsToAnalyze in + WeightedSpanTermExtractor when adding a new field to MemoryIndex as well as + when using CachingTokenStream. This can be a significant performance bug for + large documents. (Mark Miller) + + * LUCENE-3043: GermanStemmer threw IndexOutOfBoundsException if it encountered + a zero-length token. (Robert Muir) + + * LUCENE-3044: ThaiWordFilter didn't reset its cached state correctly, this only + caused a problem if you consumed a tokenstream, then reused it, added different + attributes to it, and consumed it again. (Robert Muir, Uwe Schindler) + +New Features + + * LUCENE-3016: Add analyzer for Latvian. (Robert Muir) + +======================= Lucene 3.1.0 ======================= Changes in backwards compatibility policy * LUCENE-2100: All Analyzers in Lucene-contrib have been marked as final. Analyzers should be only act as a composition of TokenStreams, users should compose their own analyzers instead of subclassing existing ones. - (Simon Willnauer) + (Simon Willnauer) * LUCENE-2194, LUCENE-2201: Snowball APIs were upgraded to snowball revision - 502 (with some local modifications for improved performance). - Index backwards compatibility and binary backwards compatibility is - preserved, but some protected/public member variables changed type. This - does NOT affect java code/class files produced by the snowball compiler, + 502 (with some local modifications for improved performance). + Index backwards compatibility and binary backwards compatibility is + preserved, but some protected/public member variables changed type. This + does NOT affect java code/class files produced by the snowball compiler, but technically is a backwards compatibility break. (Robert Muir) - + * LUCENE-2226: Moved contrib/snowball functionality into contrib/analyzers. Be sure to remove any old obselete lucene-snowball jar files from your classpath! (Robert Muir) - + * LUCENE-2323: Moved contrib/wikipedia functionality into contrib/analyzers. Additionally the package was changed from org.apache.lucene.wikipedia.analysis to org.apache.lucene.analysis.wikipedia. (Robert Muir) * LUCENE-2581: Added new methods to FragmentsBuilder interface. These methods are used to set pre/post tags and Encoder. (Koji Sekiguchi) - + + * LUCENE-2391: Improved spellchecker (re)build time/ram usage by omitting + frequencies/positions/norms for single-valued fields, modifying the default + ramBufferMBSize to match IndexWriterConfig (16MB), making index optimization + an optional boolean parameter, and modifying the incremental update logic + to work well with unoptimized spellcheck indexes. The indexDictionary() methods + were made final to ensure a hard backwards break in case you were subclassing + Spellchecker. In general, subclassing Spellchecker is not recommended. (Robert Muir) + Changes in runtime behavior * LUCENE-2117: SnowballAnalyzer uses TurkishLowerCaseFilter instead of LowercaseFilter to correctly handle the unique Turkish casing behavior if used with Version > 3.0 and the TurkishStemmer. - (Robert Muir via Simon Willnauer) + (Robert Muir via Simon Willnauer) - * LUCENE-2055: GermanAnalyzer now uses the Snowball German2 algorithm and + * LUCENE-2055: GermanAnalyzer now uses the Snowball German2 algorithm and stopwords list by default for Version > 3.0. (Robert Muir, Uwe Schindler, Simon Willnauer) Bug fixes + * LUCENE-2855: contrib queryparser was using CharSequence as key in some internal + Map instances, which was leading to incorrect behavior, since some CharSequence + implementors do not override hashcode and equals methods. Now the internal Maps + are using String instead. (Adriano Crestani) + * LUCENE-2068: Fixed ReverseStringFilter which was not aware of supplementary characters. During reverse the filter created unpaired surrogates, which will be replaced by U+FFFD by the indexer, but not at query time. The filter now reverses supplementary characters correctly if used with Version > 3.0. (Simon Willnauer, Robert Muir) - * LUCENE-2035: TokenSources.getTokenStream() does not assign positionIncrement. + * LUCENE-2035: TokenSources.getTokenStream() does not assign positionIncrement. (Christopher Morris via Mark Miller) - + * LUCENE-2055: Deprecated RussianTokenizer, RussianStemmer, RussianStemFilter, FrenchStemmer, FrenchStemFilter, DutchStemmer, and DutchStemFilter. For these Analyzers, SnowballFilter is used instead (for Version > 3.0), as @@ -113,48 +144,55 @@ Bug fixes default. (Robert Muir, Uwe Schindler, Simon Willnauer) * LUCENE-2184: Fixed bug with handling best fit value when the proper best fit value is - not an indexed field. Note, this change affects the APIs. (Grant Ingersoll) - + not an indexed field. Note, this change affects the APIs. (Grant Ingersoll) + * LUCENE-2359: Fix bug in CartesianPolyFilterBuilder related to handling of behavior around - the 180th meridian (Grant Ingersoll) + the 180th meridian (Grant Ingersoll) * LUCENE-2404: Fix bugs with position increment and empty tokens in ThaiWordFilter. For matchVersion >= 3.1 the filter also no longer lowercases. ThaiAnalyzer will use a separate LowerCaseFilter instead. (Uwe Schindler, Robert Muir) -* LUCENE-2615: Fix DirectIOLinuxDirectory to not assign bogus - permissions to newly created files, and to not silently hardwire - buffer size to 1 MB. (Mark Miller, Robert Muir, Mike McCandless) + * LUCENE-2615: Fix DirectIOLinuxDirectory to not assign bogus + permissions to newly created files, and to not silently hardwire + buffer size to 1 MB. (Mark Miller, Robert Muir, Mike McCandless) -* LUCENE-2629: Fix gennorm2 task for generating ICUFoldingFilter's .nrm file. This allows - you to customize its normalization/folding, by editing the source data files in src/data - and regenerating a new .nrm with 'ant gennorm2'. (David Bowen via Robert Muir) + * LUCENE-2629: Fix gennorm2 task for generating ICUFoldingFilter's .nrm file. This allows + you to customize its normalization/folding, by editing the source data files in src/data + and regenerating a new .nrm with 'ant gennorm2'. (David Bowen via Robert Muir) -* LUCENE-2653: ThaiWordFilter depends on the JRE having a Thai dictionary, which is not - always the case. If the dictionary is unavailable, the filter will now throw - UnsupportedOperationException in the constructor. (Robert Muir) + * LUCENE-2653: ThaiWordFilter depends on the JRE having a Thai dictionary, which is not + always the case. If the dictionary is unavailable, the filter will now throw + UnsupportedOperationException in the constructor. (Robert Muir) -* LUCENE-589: Fix contrib/demo for international documents. - (Curtis d'Entremont via Robert Muir) - -* LUCENE-2246: Fix contrib/demo for Turkish html documents. - (Selim Nadi via Robert Muir) - -* LUCENE-590: Demo HTML parser gives incorrect summaries when title is repeated as a heading - (Curtis d'Entremont via Robert Muir) + * LUCENE-589: Fix contrib/demo for international documents. + (Curtis d'Entremont via Robert Muir) -* LUCENE-591: The demo indexer now indexes meta keywords. - (Curtis d'Entremont via Robert Muir) + * LUCENE-2246: Fix contrib/demo for Turkish html documents. + (Selim Nadi via Robert Muir) - * LUCENE-2943: Fix thread-safety issues with ICUCollationKeyFilter. + * LUCENE-590: Demo HTML parser gives incorrect summaries when title is repeated as a heading + (Curtis d'Entremont via Robert Muir) + + * LUCENE-591: The demo indexer now indexes meta keywords. + (Curtis d'Entremont via Robert Muir) + + * LUCENE-2874: Highlighting overlapping tokens outputted doubled words. + (Pierre Gossé via Robert Muir) + + * LUCENE-2943: Fix thread-safety issues with ICUCollationKeyFilter. (Robert Muir) - + API Changes + * LUCENE-2867: Some contrib queryparser methods that receives CharSequence as + identifier, such as QueryNode#unsetTag(CharSequence), were deprecated and + will be removed on version 4. (Adriano Crestani) + * LUCENE-2147: Spatial GeoHashUtils now always decode GeoHash strings with full precision. GeoHash#decode_exactly(String) was merged into GeoHash#decode(String). (Chris Male, Simon Willnauer) - + * LUCENE-2204: Change some package private classes/members to publicly accessible to implement custom FragmentsBuilders. (Koji Sekiguchi) @@ -171,14 +209,14 @@ API Changes * LUCENE-2626: FastVectorHighlighter: enable FragListBuilder and FragmentsBuilder to be set per-field override. (Koji Sekiguchi) - * LUCENE-2712: FieldBoostMapAttribute in contrib/queryparser was changed from + * LUCENE-2712: FieldBoostMapAttribute in contrib/queryparser was changed from a Map to a Map. Per the CharSequence javadoc, CharSequence is inappropriate as a map key. (Robert Muir) * LUCENE-1937: Add more methods to manipulate QueryNodeProcessorPipeline elements. QueryNodeProcessorPipeline now implements the List interface, this is useful if you want to extend or modify an existing pipeline. (Adriano Crestani via Robert Muir) - + * LUCENE-2754, LUCENE-2757: Deprecated SpanRegexQuery. Use new SpanMultiTermQueryWrapper(new RegexQuery()) instead. (Robert Muir, Uwe Schindler) @@ -186,18 +224,27 @@ API Changes * LUCENE-2747: Deprecated ArabicLetterTokenizer. StandardTokenizer now tokenizes most languages correctly including Arabic. (Steven Rowe, Robert Muir) + * LUCENE-2830: Use StringBuilder instead of StringBuffer across Benchmark, and + remove the StringBuffer HtmlParser.parse() variant. (Shai Erera) + * LUCENE-2920: Deprecated ShingleMatrixFilter as it is unmaintained and does not work with custom Attributes or custom payload encoders. (Uwe Schindler) - + New features + * LUCENE-2500: Added DirectIOLinuxDirectory, a Linux-specific + Directory impl that uses the O_DIRECT flag to bypass the buffer + cache. This is useful to prevent segment merging from evicting + pages from the buffer cache, since fadvise/madvise do not seem. + (Michael McCandless) + * LUCENE-2306: Add NumericRangeFilter and NumericRangeQuery support to XMLQueryParser. (Jingkei Ly, via Mark Harwood) * LUCENE-2102: Add a Turkish LowerCase Filter. TurkishLowerCaseFilter handles Turkish and Azeri unique casing behavior correctly. (Ahmet Arslan, Robert Muir via Simon Willnauer) - + * LUCENE-2039: Add a extensible query parser to contrib/misc. ExtendableQueryParser enables arbitrary parser extensions based on a customizable field naming scheme. @@ -205,11 +252,11 @@ New features * LUCENE-2067: Add a Czech light stemmer. CzechAnalyzer will now stem words when Version is set to 3.1 or higher. (Robert Muir) - + * LUCENE-2062: Add a Bulgarian analyzer. (Robert Muir, Simon Willnauer) * LUCENE-2206: Add Snowball's stopword lists for Danish, Dutch, English, - Finnish, French, German, Hungarian, Italian, Norwegian, Russian, Spanish, + Finnish, French, German, Hungarian, Italian, Norwegian, Russian, Spanish, and Swedish. These can be loaded with WordListLoader.getSnowballWordSet. (Robert Muir, Simon Willnauer) @@ -217,7 +264,7 @@ New features (Koji Sekiguchi) * LUCENE-2218: ShingleFilter supports minimum shingle size, and the separator - character is now configurable. Its also up to 20% faster. + character is now configurable. Its also up to 20% faster. (Steven Rowe via Robert Muir) * LUCENE-2234: Add a Hindi analyzer. (Robert Muir) @@ -247,7 +294,7 @@ New features * LUCENE-2298: Add analyzers/stempel, an algorithmic stemmer with support for the Polish language. (Andrzej Bialecki via Robert Muir) - * LUCENE-2400: ShingleFilter was changed to don't output all-filler shingles and + * LUCENE-2400: ShingleFilter was changed to don't output all-filler shingles and unigrams, and uses a more performant algorithm to build grams using a linked list of AttributeSource.cloneAttributes() instances and the new copyTo() method. (Steven Rowe via Uwe Schindler) @@ -266,7 +313,7 @@ New features * LUCENE-2464: FastVectorHighlighter: add SingleFragListBuilder to return entire field contents. (Koji Sekiguchi) - * LUCENE-2503: Added lighter stemming alternatives for European languages. + * LUCENE-2503: Added lighter stemming alternatives for European languages. (Robert Muir) * LUCENE-2581: FastVectorHighlighter: add Encoder to FragmentsBuilder. @@ -274,12 +321,23 @@ New features * LUCENE-2624: Add Analyzers for Armenian, Basque, and Catalan, from snowball. (Robert Muir) - + * LUCENE-1938: PrecedenceQueryParser is now implemented with the flexible QP framework. This means that you can also add this functionality to your own QP pipeline by using BooleanModifiersQueryNodeProcessor, for example instead of GroupQueryNodeProcessor. (Adriano Crestani via Robert Muir) + * LUCENE-2791: Added WindowsDirectory, a Windows-specific Directory impl + that doesn't synchronize on the file handle. This can be useful to + avoid the performance problems of SimpleFSDirectory and NIOFSDirectory. + (Robert Muir, Simon Willnauer, Uwe Schindler, Michael McCandless) + + * LUCENE-2842: Add analyzer for Galician. Also adds the RSLP (Orengo) stemmer + for Portuguese. (Robert Muir) + + * SOLR-1057: Add PathHierarchyTokenizer that represents file path hierarchies as synonyms of + /something, /something/something, /something/something/else. (Ryan McKinley, Koji Sekiguchi) + Build * LUCENE-2124: Moved the JDK-based collation support from contrib/collation @@ -299,7 +357,12 @@ Build * LUCENE-2797: Upgrade contrib/icu's ICU jar file to ICU 4.6 (Robert Muir) - + + * LUCENE-2833: Upgrade contrib/ant's jtidy jar file to r938 (Robert Muir) + + * LUCENE-2413: Moved the demo out of lucene core and into contrib/demo. + (Robert Muir) + Optimizations * LUCENE-2157: DelimitedPayloadTokenFilter no longer copies the buffer diff --git a/lucene/contrib/ant/src/java/org/apache/lucene/ant/IndexTask.java b/lucene/contrib/ant/src/java/org/apache/lucene/ant/IndexTask.java index b22638c713a..9e1c7480df5 100644 --- a/lucene/contrib/ant/src/java/org/apache/lucene/ant/IndexTask.java +++ b/lucene/contrib/ant/src/java/org/apache/lucene/ant/IndexTask.java @@ -39,7 +39,7 @@ import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; -import org.apache.lucene.index.LogMergePolicy; +import org.apache.lucene.index.TieredMergePolicy; import org.apache.lucene.index.Term; import org.apache.lucene.index.IndexWriterConfig.OpenMode; import org.apache.lucene.search.IndexSearcher; @@ -285,9 +285,9 @@ public class IndexTask extends Task { IndexWriterConfig conf = new IndexWriterConfig( Version.LUCENE_CURRENT, analyzer).setOpenMode( create ? OpenMode.CREATE : OpenMode.APPEND); - LogMergePolicy lmp = (LogMergePolicy) conf.getMergePolicy(); - lmp.setUseCompoundFile(useCompoundIndex); - lmp.setMergeFactor(mergeFactor); + TieredMergePolicy tmp = (TieredMergePolicy) conf.getMergePolicy(); + tmp.setUseCompoundFile(useCompoundIndex); + tmp.setMaxMergeAtOnce(mergeFactor); IndexWriter writer = new IndexWriter(dir, conf); int totalFiles = 0; int totalIndexed = 0; diff --git a/lucene/contrib/db/bdb-je/lib/je-3.3.93.jar b/lucene/contrib/db/bdb-je/lib/je-3.3.93.jar new file mode 100644 index 00000000000..4ceafc9209a --- /dev/null +++ b/lucene/contrib/db/bdb-je/lib/je-3.3.93.jar @@ -0,0 +1,2 @@ +AnyObjectId[9a9ff077cdd36a96e7e0506986edd4e52b90a22f] was removed in git history. +Apache SVN contains full history. \ No newline at end of file diff --git a/lucene/contrib/db/bdb-je/lib/je-LICENSE-FAKE.txt b/lucene/contrib/db/bdb-je/lib/je-LICENSE-FAKE.txt new file mode 100644 index 00000000000..a1defaa3da4 --- /dev/null +++ b/lucene/contrib/db/bdb-je/lib/je-LICENSE-FAKE.txt @@ -0,0 +1 @@ +No bdb jars are shipped with lucene. This is a fake license to work around the automated license checking. diff --git a/lucene/contrib/db/bdb-je/lib/je-NOTICE-FAKE.txt b/lucene/contrib/db/bdb-je/lib/je-NOTICE-FAKE.txt new file mode 100644 index 00000000000..a1defaa3da4 --- /dev/null +++ b/lucene/contrib/db/bdb-je/lib/je-NOTICE-FAKE.txt @@ -0,0 +1 @@ +No bdb jars are shipped with lucene. This is a fake license to work around the automated license checking. diff --git a/lucene/contrib/db/bdb/lib/db--NOTICE-FAKE.txt b/lucene/contrib/db/bdb/lib/db--NOTICE-FAKE.txt new file mode 100644 index 00000000000..a1defaa3da4 --- /dev/null +++ b/lucene/contrib/db/bdb/lib/db--NOTICE-FAKE.txt @@ -0,0 +1 @@ +No bdb jars are shipped with lucene. This is a fake license to work around the automated license checking. diff --git a/lucene/contrib/db/bdb/lib/db-4.7.25.jar b/lucene/contrib/db/bdb/lib/db-4.7.25.jar new file mode 100644 index 00000000000..fedd3e2adf2 --- /dev/null +++ b/lucene/contrib/db/bdb/lib/db-4.7.25.jar @@ -0,0 +1,2 @@ +AnyObjectId[99baf20bacd712cae91dd6e4e1f46224cafa1a37] was removed in git history. +Apache SVN contains full history. \ No newline at end of file diff --git a/lucene/contrib/db/bdb/lib/db-LICENSE-FAKE.txt b/lucene/contrib/db/bdb/lib/db-LICENSE-FAKE.txt new file mode 100644 index 00000000000..a1defaa3da4 --- /dev/null +++ b/lucene/contrib/db/bdb/lib/db-LICENSE-FAKE.txt @@ -0,0 +1 @@ +No bdb jars are shipped with lucene. This is a fake license to work around the automated license checking. diff --git a/lucene/contrib/demo/src/test/org/apache/lucene/demo/TestDemo.java b/lucene/contrib/demo/src/test/org/apache/lucene/demo/TestDemo.java index 4457ef7aae3..d2bd59d0963 100644 --- a/lucene/contrib/demo/src/test/org/apache/lucene/demo/TestDemo.java +++ b/lucene/contrib/demo/src/test/org/apache/lucene/demo/TestDemo.java @@ -22,16 +22,17 @@ import java.io.File; import java.io.PrintStream; import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.util._TestUtil; public class TestDemo extends LuceneTestCase { - private void testOneSearch(String query, int expectedHitCount) throws Exception { + private void testOneSearch(File indexPath, String query, int expectedHitCount) throws Exception { PrintStream outSave = System.out; try { ByteArrayOutputStream bytes = new ByteArrayOutputStream(); PrintStream fakeSystemOut = new PrintStream(bytes); System.setOut(fakeSystemOut); - SearchFiles.main(new String[] {"-query", query}); + SearchFiles.main(new String[] {"-query", query, "-index", indexPath.getPath()}); fakeSystemOut.flush(); String output = bytes.toString(); // intentionally use default encoding assertTrue("output=" + output, output.contains(expectedHitCount + " total matching documents")); @@ -42,12 +43,13 @@ public class TestDemo extends LuceneTestCase { public void testIndexSearch() throws Exception { File dir = getDataFile("test-files/docs"); - IndexFiles.main(new String[] { "-create", "-docs", dir.getPath() }); - testOneSearch("apache", 3); - testOneSearch("patent", 8); - testOneSearch("lucene", 0); - testOneSearch("gnu", 6); - testOneSearch("derivative", 8); - testOneSearch("license", 13); + File indexDir = _TestUtil.getTempDir("ContribDemoTest"); + IndexFiles.main(new String[] { "-create", "-docs", dir.getPath(), "-index", indexDir.getPath()}); + testOneSearch(indexDir, "apache", 3); + testOneSearch(indexDir, "patent", 8); + testOneSearch(indexDir, "lucene", 0); + testOneSearch(indexDir, "gnu", 6); + testOneSearch(indexDir, "derivative", 8); + testOneSearch(indexDir, "license", 13); } } diff --git a/lucene/contrib/highlighter/.cvsignore b/lucene/contrib/highlighter/.cvsignore deleted file mode 100644 index 9d0b71a3c79..00000000000 --- a/lucene/contrib/highlighter/.cvsignore +++ /dev/null @@ -1,2 +0,0 @@ -build -dist diff --git a/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/Highlighter.java b/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/Highlighter.java index 5deafd62faa..2c2104570e4 100644 --- a/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/Highlighter.java +++ b/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/Highlighter.java @@ -197,6 +197,11 @@ public class Highlighter tokenStream.reset(); TextFragment currentFrag = new TextFragment(newText,newText.length(), docFrags.size()); + + if (fragmentScorer instanceof QueryScorer) { + ((QueryScorer) fragmentScorer).setMaxDocCharsToAnalyze(maxDocCharsToAnalyze); + } + TokenStream newStream = fragmentScorer.init(tokenStream); if(newStream != null) { tokenStream = newStream; diff --git a/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/OffsetLimitTokenFilter.java b/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/OffsetLimitTokenFilter.java new file mode 100644 index 00000000000..2102c28d894 --- /dev/null +++ b/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/OffsetLimitTokenFilter.java @@ -0,0 +1,57 @@ +package org.apache.lucene.search.highlight; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; + +import org.apache.lucene.analysis.TokenFilter; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; + +/** + * This TokenFilter limits the number of tokens while indexing by adding up the + * current offset. + */ +public final class OffsetLimitTokenFilter extends TokenFilter { + + private int offsetCount; + private OffsetAttribute offsetAttrib = getAttribute(OffsetAttribute.class); + private int offsetLimit; + + public OffsetLimitTokenFilter(TokenStream input, int offsetLimit) { + super(input); + this.offsetLimit = offsetLimit; + } + + @Override + public boolean incrementToken() throws IOException { + if (offsetCount < offsetLimit && input.incrementToken()) { + int offsetLength = offsetAttrib.endOffset() - offsetAttrib.startOffset(); + offsetCount += offsetLength; + return true; + } + return false; + } + + @Override + public void reset() throws IOException { + super.reset(); + offsetCount = 0; + } + +} diff --git a/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/QueryScorer.java b/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/QueryScorer.java index e0b76a4aebd..706fb89151e 100644 --- a/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/QueryScorer.java +++ b/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/QueryScorer.java @@ -54,6 +54,7 @@ public class QueryScorer implements Scorer { private IndexReader reader; private boolean skipInitExtractor; private boolean wrapToCaching = true; + private int maxCharsToAnalyze; /** * @param query Query to use for highlighting @@ -209,7 +210,7 @@ public class QueryScorer implements Scorer { private TokenStream initExtractor(TokenStream tokenStream) throws IOException { WeightedSpanTermExtractor qse = defaultField == null ? new WeightedSpanTermExtractor() : new WeightedSpanTermExtractor(defaultField); - + qse.setMaxDocCharsToAnalyze(maxCharsToAnalyze); qse.setExpandMultiTermQuery(expandMultiTermQuery); qse.setWrapIfNotCachingTokenFilter(wrapToCaching); if (reader == null) { @@ -265,4 +266,8 @@ public class QueryScorer implements Scorer { public void setWrapIfNotCachingTokenFilter(boolean wrap) { this.wrapToCaching = wrap; } + + public void setMaxDocCharsToAnalyze(int maxDocCharsToAnalyze) { + this.maxCharsToAnalyze = maxDocCharsToAnalyze; + } } diff --git a/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java b/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java index 471c29ee070..4d5990d6dab 100644 --- a/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java +++ b/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java @@ -56,6 +56,7 @@ public class WeightedSpanTermExtractor { private boolean expandMultiTermQuery; private boolean cachedTokenStream; private boolean wrapToCaching = true; + private int maxDocCharsToAnalyze; public WeightedSpanTermExtractor() { } @@ -320,13 +321,13 @@ public class WeightedSpanTermExtractor { private AtomicReaderContext getLeafContextForField(String field) throws IOException { if(wrapToCaching && !cachedTokenStream && !(tokenStream instanceof CachingTokenFilter)) { - tokenStream = new CachingTokenFilter(tokenStream); + tokenStream = new CachingTokenFilter(new OffsetLimitTokenFilter(tokenStream, maxDocCharsToAnalyze)); cachedTokenStream = true; } AtomicReaderContext context = readers.get(field); if (context == null) { MemoryIndex indexer = new MemoryIndex(); - indexer.addField(field, tokenStream); + indexer.addField(field, new OffsetLimitTokenFilter(tokenStream, maxDocCharsToAnalyze)); tokenStream.reset(); IndexSearcher searcher = indexer.createSearcher(); // MEM index has only atomic ctx @@ -545,4 +546,8 @@ public class WeightedSpanTermExtractor { public void setWrapIfNotCachingTokenFilter(boolean wrap) { this.wrapToCaching = wrap; } + + protected final void setMaxDocCharsToAnalyze(int maxDocCharsToAnalyze) { + this.maxDocCharsToAnalyze = maxDocCharsToAnalyze; + } } diff --git a/lucene/contrib/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterPhraseTest.java b/lucene/contrib/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterPhraseTest.java index 755d9f5d4ec..6687727a4a6 100644 --- a/lucene/contrib/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterPhraseTest.java +++ b/lucene/contrib/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterPhraseTest.java @@ -58,7 +58,7 @@ public class HighlighterPhraseTest extends LuceneTestCase { final String TEXT = "the fox jumped"; final Directory directory = newDirectory(); final IndexWriter indexWriter = new IndexWriter(directory, - newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(MockTokenizer.WHITESPACE, false))); + newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random, MockTokenizer.WHITESPACE, false))); try { final Document document = new Document(); document.add(new Field(FIELD, new TokenStreamConcurrent(), @@ -102,7 +102,7 @@ public class HighlighterPhraseTest extends LuceneTestCase { final String TEXT = "the fox jumped"; final Directory directory = newDirectory(); final IndexWriter indexWriter = new IndexWriter(directory, - newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(MockTokenizer.WHITESPACE, false))); + newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random, MockTokenizer.WHITESPACE, false))); try { final Document document = new Document(); document.add(new Field(FIELD, new TokenStreamConcurrent(), @@ -172,7 +172,7 @@ public class HighlighterPhraseTest extends LuceneTestCase { final String TEXT = "the fox did not jump"; final Directory directory = newDirectory(); final IndexWriter indexWriter = new IndexWriter(directory, - newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(MockTokenizer.WHITESPACE, false))); + newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random, MockTokenizer.WHITESPACE, false))); try { final Document document = new Document(); document.add(new Field(FIELD, new TokenStreamSparse(), @@ -215,7 +215,7 @@ public class HighlighterPhraseTest extends LuceneTestCase { final String TEXT = "the fox did not jump"; final Directory directory = newDirectory(); final IndexWriter indexWriter = new IndexWriter(directory, - newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(MockTokenizer.WHITESPACE, false))); + newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random, MockTokenizer.WHITESPACE, false))); try { final Document document = new Document(); document.add(new Field(FIELD, TEXT, Store.YES, Index.ANALYZED, @@ -256,7 +256,7 @@ public class HighlighterPhraseTest extends LuceneTestCase { final String TEXT = "the fox did not jump"; final Directory directory = newDirectory(); final IndexWriter indexWriter = new IndexWriter(directory, - newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(MockTokenizer.WHITESPACE, false))); + newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random, MockTokenizer.WHITESPACE, false))); try { final Document document = new Document(); document.add(new Field(FIELD, new TokenStreamSparse(), diff --git a/lucene/contrib/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java b/lucene/contrib/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java index 7c99c5d5a57..cea67428617 100644 --- a/lucene/contrib/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java +++ b/lucene/contrib/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java @@ -90,7 +90,7 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte Directory ramDir; public IndexSearcher searcher = null; int numHighlights = 0; - final Analyzer analyzer = new MockAnalyzer(MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET, true); + final Analyzer analyzer = new MockAnalyzer(random, MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET, true); TopDocs hits; String[] texts = { @@ -101,7 +101,7 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte "wordx wordy wordz wordx wordy wordx worda wordb wordy wordc", "y z x y z a b", "lets is a the lets is a the lets is a the lets" }; public void testQueryScorerHits() throws Exception { - Analyzer analyzer = new MockAnalyzer(MockTokenizer.SIMPLE, true); + Analyzer analyzer = new MockAnalyzer(random, MockTokenizer.SIMPLE, true); QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, FIELD_NAME, analyzer); query = qp.parse("\"very long\""); searcher = new IndexSearcher(ramDir, true); @@ -133,7 +133,7 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte String s1 = "I call our world Flatland, not because we call it so,"; - QueryParser parser = new QueryParser(TEST_VERSION_CURRENT, FIELD_NAME, new MockAnalyzer(MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET, true)); + QueryParser parser = new QueryParser(TEST_VERSION_CURRENT, FIELD_NAME, new MockAnalyzer(random, MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET, true)); // Verify that a query against the default field results in text being // highlighted @@ -165,7 +165,7 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte */ private static String highlightField(Query query, String fieldName, String text) throws IOException, InvalidTokenOffsetsException { - TokenStream tokenStream = new MockAnalyzer(MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET, true).tokenStream(fieldName, new StringReader(text)); + TokenStream tokenStream = new MockAnalyzer(random, MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET, true).tokenStream(fieldName, new StringReader(text)); // Assuming "", "" used to highlight SimpleHTMLFormatter formatter = new SimpleHTMLFormatter(); QueryScorer scorer = new QueryScorer(query, fieldName, FIELD_NAME); @@ -210,7 +210,7 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte String f2c = f2 + ":"; String q = "(" + f1c + ph1 + " OR " + f2c + ph1 + ") AND (" + f1c + ph2 + " OR " + f2c + ph2 + ")"; - Analyzer analyzer = new MockAnalyzer(MockTokenizer.WHITESPACE, false); + Analyzer analyzer = new MockAnalyzer(random, MockTokenizer.WHITESPACE, false); QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, f1, analyzer); Query query = qp.parse(q); @@ -1134,13 +1134,13 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte sb.append("stoppedtoken"); } SimpleHTMLFormatter fm = new SimpleHTMLFormatter(); - Highlighter hg = getHighlighter(query, "data", new MockAnalyzer(MockTokenizer.SIMPLE, true, stopWords, true).tokenStream( + Highlighter hg = getHighlighter(query, "data", new MockAnalyzer(random, MockTokenizer.SIMPLE, true, stopWords, true).tokenStream( "data", new StringReader(sb.toString())), fm);// new Highlighter(fm, // new // QueryTermScorer(query)); hg.setTextFragmenter(new NullFragmenter()); hg.setMaxDocCharsToAnalyze(100); - match = hg.getBestFragment(new MockAnalyzer(MockTokenizer.SIMPLE, true, stopWords, true), "data", sb.toString()); + match = hg.getBestFragment(new MockAnalyzer(random, MockTokenizer.SIMPLE, true, stopWords, true), "data", sb.toString()); assertTrue("Matched text should be no more than 100 chars in length ", match.length() < hg .getMaxDocCharsToAnalyze()); @@ -1151,7 +1151,7 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte // + whitespace) sb.append(" "); sb.append(goodWord); - match = hg.getBestFragment(new MockAnalyzer(MockTokenizer.SIMPLE, true, stopWords, true), "data", sb.toString()); + match = hg.getBestFragment(new MockAnalyzer(random, MockTokenizer.SIMPLE, true, stopWords, true), "data", sb.toString()); assertTrue("Matched text should be no more than 100 chars in length ", match.length() < hg .getMaxDocCharsToAnalyze()); } @@ -1170,10 +1170,10 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte String text = "this is a text with searchterm in it"; SimpleHTMLFormatter fm = new SimpleHTMLFormatter(); - Highlighter hg = getHighlighter(query, "text", new MockAnalyzer(MockTokenizer.SIMPLE, true, stopWords, true).tokenStream("text", new StringReader(text)), fm); + Highlighter hg = getHighlighter(query, "text", new MockAnalyzer(random, MockTokenizer.SIMPLE, true, stopWords, true).tokenStream("text", new StringReader(text)), fm); hg.setTextFragmenter(new NullFragmenter()); hg.setMaxDocCharsToAnalyze(36); - String match = hg.getBestFragment(new MockAnalyzer(MockTokenizer.SIMPLE, true, stopWords, true), "text", text); + String match = hg.getBestFragment(new MockAnalyzer(random, MockTokenizer.SIMPLE, true, stopWords, true), "text", text); assertTrue( "Matched text should contain remainder of text after highlighted query ", match.endsWith("in it")); @@ -1191,7 +1191,7 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte // test to show how rewritten query can still be used if (searcher != null) searcher.close(); searcher = new IndexSearcher(ramDir, true); - Analyzer analyzer = new MockAnalyzer(MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET, true); + Analyzer analyzer = new MockAnalyzer(random, MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET, true); QueryParser parser = new QueryParser(TEST_VERSION_CURRENT, FIELD_NAME, analyzer); Query query = parser.parse("JF? or Kenned*"); @@ -1446,64 +1446,64 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte Highlighter highlighter; String result; - query = new QueryParser(TEST_VERSION_CURRENT, "text", new MockAnalyzer(MockTokenizer.WHITESPACE, false)).parse("foo"); + query = new QueryParser(TEST_VERSION_CURRENT, "text", new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)).parse("foo"); highlighter = getHighlighter(query, "text", getTS2(), HighlighterTest.this); result = highlighter.getBestFragments(getTS2(), s, 3, "..."); assertEquals("Hi-Speed10 foo", result); - query = new QueryParser(TEST_VERSION_CURRENT, "text", new MockAnalyzer(MockTokenizer.WHITESPACE, false)).parse("10"); + query = new QueryParser(TEST_VERSION_CURRENT, "text", new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)).parse("10"); highlighter = getHighlighter(query, "text", getTS2(), HighlighterTest.this); result = highlighter.getBestFragments(getTS2(), s, 3, "..."); assertEquals("Hi-Speed10 foo", result); - query = new QueryParser(TEST_VERSION_CURRENT, "text", new MockAnalyzer(MockTokenizer.WHITESPACE, false)).parse("hi"); + query = new QueryParser(TEST_VERSION_CURRENT, "text", new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)).parse("hi"); highlighter = getHighlighter(query, "text", getTS2(), HighlighterTest.this); result = highlighter.getBestFragments(getTS2(), s, 3, "..."); assertEquals("Hi-Speed10 foo", result); - query = new QueryParser(TEST_VERSION_CURRENT, "text", new MockAnalyzer(MockTokenizer.WHITESPACE, false)).parse("speed"); + query = new QueryParser(TEST_VERSION_CURRENT, "text", new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)).parse("speed"); highlighter = getHighlighter(query, "text", getTS2(), HighlighterTest.this); result = highlighter.getBestFragments(getTS2(), s, 3, "..."); assertEquals("Hi-Speed10 foo", result); - query = new QueryParser(TEST_VERSION_CURRENT, "text", new MockAnalyzer(MockTokenizer.WHITESPACE, false)).parse("hispeed"); + query = new QueryParser(TEST_VERSION_CURRENT, "text", new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)).parse("hispeed"); highlighter = getHighlighter(query, "text", getTS2(), HighlighterTest.this); result = highlighter.getBestFragments(getTS2(), s, 3, "..."); assertEquals("Hi-Speed10 foo", result); - query = new QueryParser(TEST_VERSION_CURRENT, "text", new MockAnalyzer(MockTokenizer.WHITESPACE, false)).parse("hi speed"); + query = new QueryParser(TEST_VERSION_CURRENT, "text", new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)).parse("hi speed"); highlighter = getHighlighter(query, "text", getTS2(), HighlighterTest.this); result = highlighter.getBestFragments(getTS2(), s, 3, "..."); assertEquals("Hi-Speed10 foo", result); // ///////////////// same tests, just put the bigger overlapping token // first - query = new QueryParser(TEST_VERSION_CURRENT, "text", new MockAnalyzer(MockTokenizer.WHITESPACE, false)).parse("foo"); + query = new QueryParser(TEST_VERSION_CURRENT, "text", new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)).parse("foo"); highlighter = getHighlighter(query, "text", getTS2a(), HighlighterTest.this); result = highlighter.getBestFragments(getTS2a(), s, 3, "..."); assertEquals("Hi-Speed10 foo", result); - query = new QueryParser(TEST_VERSION_CURRENT, "text", new MockAnalyzer(MockTokenizer.WHITESPACE, false)).parse("10"); + query = new QueryParser(TEST_VERSION_CURRENT, "text", new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)).parse("10"); highlighter = getHighlighter(query, "text", getTS2a(), HighlighterTest.this); result = highlighter.getBestFragments(getTS2a(), s, 3, "..."); assertEquals("Hi-Speed10 foo", result); - query = new QueryParser(TEST_VERSION_CURRENT, "text", new MockAnalyzer(MockTokenizer.WHITESPACE, false)).parse("hi"); + query = new QueryParser(TEST_VERSION_CURRENT, "text", new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)).parse("hi"); highlighter = getHighlighter(query, "text", getTS2a(), HighlighterTest.this); result = highlighter.getBestFragments(getTS2a(), s, 3, "..."); assertEquals("Hi-Speed10 foo", result); - query = new QueryParser(TEST_VERSION_CURRENT, "text", new MockAnalyzer(MockTokenizer.WHITESPACE, false)).parse("speed"); + query = new QueryParser(TEST_VERSION_CURRENT, "text", new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)).parse("speed"); highlighter = getHighlighter(query, "text", getTS2a(), HighlighterTest.this); result = highlighter.getBestFragments(getTS2a(), s, 3, "..."); assertEquals("Hi-Speed10 foo", result); - query = new QueryParser(TEST_VERSION_CURRENT, "text", new MockAnalyzer(MockTokenizer.WHITESPACE, false)).parse("hispeed"); + query = new QueryParser(TEST_VERSION_CURRENT, "text", new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)).parse("hispeed"); highlighter = getHighlighter(query, "text", getTS2a(), HighlighterTest.this); result = highlighter.getBestFragments(getTS2a(), s, 3, "..."); assertEquals("Hi-Speed10 foo", result); - query = new QueryParser(TEST_VERSION_CURRENT, "text", new MockAnalyzer(MockTokenizer.WHITESPACE, false)).parse("hi speed"); + query = new QueryParser(TEST_VERSION_CURRENT, "text", new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)).parse("hi speed"); highlighter = getHighlighter(query, "text", getTS2a(), HighlighterTest.this); result = highlighter.getBestFragments(getTS2a(), s, 3, "..."); assertEquals("Hi-Speed10 foo", result); @@ -1514,7 +1514,7 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte } private Directory dir; - private Analyzer a = new MockAnalyzer(MockTokenizer.WHITESPACE, false); + private Analyzer a = new MockAnalyzer(random, MockTokenizer.WHITESPACE, false); public void testWeightedTermsWithDeletes() throws IOException, ParseException, InvalidTokenOffsetsException { makeIndex(); @@ -1529,7 +1529,7 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte } private void makeIndex() throws IOException { - IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(MockTokenizer.WHITESPACE, false))); + IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random, MockTokenizer.WHITESPACE, false))); writer.addDocument( doc( "t_text1", "random words for highlighting tests del" ) ); writer.addDocument( doc( "t_text1", "more random words for second field del" ) ); writer.addDocument( doc( "t_text1", "random words for highlighting tests del" ) ); @@ -1539,7 +1539,7 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte } private void deleteDocument() throws IOException { - IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(MockTokenizer.WHITESPACE, false)).setOpenMode(OpenMode.APPEND)); + IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)).setOpenMode(OpenMode.APPEND)); writer.deleteDocuments( new Term( "t_text1", "del" ) ); // To see negative idf, keep comment the following line //writer.optimize(); @@ -1644,7 +1644,7 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte dir = newDirectory(); ramDir = newDirectory(); IndexWriter writer = new IndexWriter(ramDir, newIndexWriterConfig( - TEST_VERSION_CURRENT, new MockAnalyzer(MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET, true))); + TEST_VERSION_CURRENT, new MockAnalyzer(random, MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET, true))); for (String text : texts) { addDoc(writer, text); } diff --git a/lucene/contrib/highlighter/src/test/org/apache/lucene/search/highlight/OffsetLimitTokenFilterTest.java b/lucene/contrib/highlighter/src/test/org/apache/lucene/search/highlight/OffsetLimitTokenFilterTest.java new file mode 100644 index 00000000000..45aa3f51425 --- /dev/null +++ b/lucene/contrib/highlighter/src/test/org/apache/lucene/search/highlight/OffsetLimitTokenFilterTest.java @@ -0,0 +1,60 @@ +package org.apache.lucene.search.highlight; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.Reader; +import java.io.StringReader; + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.BaseTokenStreamTestCase; +import org.apache.lucene.analysis.MockTokenizer; +import org.apache.lucene.analysis.TokenStream; + +public class OffsetLimitTokenFilterTest extends BaseTokenStreamTestCase { + + public void testFilter() throws Exception { + TokenStream stream = new MockTokenizer(new StringReader( + "short toolong evenmuchlongertext a ab toolong foo"), + MockTokenizer.WHITESPACE, false); + OffsetLimitTokenFilter filter = new OffsetLimitTokenFilter(stream, 10); + assertTokenStreamContents(filter, new String[] {"short", "toolong"}); + + stream = new MockTokenizer(new StringReader( + "short toolong evenmuchlongertext a ab toolong foo"), + MockTokenizer.WHITESPACE, false); + filter = new OffsetLimitTokenFilter(stream, 12); + assertTokenStreamContents(filter, new String[] {"short", "toolong"}); + + stream = new MockTokenizer(new StringReader( + "short toolong evenmuchlongertext a ab toolong foo"), + MockTokenizer.WHITESPACE, false); + filter = new OffsetLimitTokenFilter(stream, 30); + assertTokenStreamContents(filter, new String[] {"short", "toolong", + "evenmuchlongertext"}); + + + checkOneTermReuse(new Analyzer() { + + @Override + public TokenStream tokenStream(String fieldName, Reader reader) { + return new OffsetLimitTokenFilter(new MockTokenizer(reader, + MockTokenizer.WHITESPACE, false), 10); + } + }, "llenges", "llenges"); + } +} \ No newline at end of file diff --git a/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/AbstractTestCase.java b/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/AbstractTestCase.java index 85f95097c95..0f19ebfd459 100644 --- a/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/AbstractTestCase.java +++ b/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/AbstractTestCase.java @@ -87,9 +87,9 @@ public abstract class AbstractTestCase extends LuceneTestCase { @Override public void setUp() throws Exception { super.setUp(); - analyzerW = new MockAnalyzer(MockTokenizer.WHITESPACE, false); + analyzerW = new MockAnalyzer(random, MockTokenizer.WHITESPACE, false); analyzerB = new BigramAnalyzer(); - analyzerK = new MockAnalyzer(MockTokenizer.KEYWORD, false); + analyzerK = new MockAnalyzer(random, MockTokenizer.KEYWORD, false); paW = new QueryParser(TEST_VERSION_CURRENT, F, analyzerW ); paB = new QueryParser(TEST_VERSION_CURRENT, F, analyzerB ); dir = newDirectory(); diff --git a/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndexReader.java b/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndexReader.java index 742e101ce93..656a5a48ed8 100644 --- a/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndexReader.java +++ b/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndexReader.java @@ -32,8 +32,7 @@ import java.util.Comparator; import org.apache.lucene.document.Document; import org.apache.lucene.document.FieldSelector; import org.apache.lucene.index.*; -import org.apache.lucene.index.values.DocValues; -import org.apache.lucene.index.IndexReader.ReaderContext; +import org.apache.lucene.index.codecs.PerDocValues; import org.apache.lucene.store.Directory; import org.apache.lucene.util.BitVector; import org.apache.lucene.util.BytesRef; @@ -391,11 +390,6 @@ public class InstantiatedIndexReader extends IndexReader { public TermsEnum terms() { return new InstantiatedTermsEnum(orderedTerms, upto, currentField); } - - @Override - public DocValues docValues() throws IOException { - return null; - } }; } @@ -439,11 +433,6 @@ public class InstantiatedIndexReader extends IndexReader { } }; } - - @Override - public DocValues docValues(String field) throws IOException { - return null; - } }; } @@ -498,4 +487,9 @@ public class InstantiatedIndexReader extends IndexReader { } } } + + @Override + public PerDocValues perDocValues() throws IOException { + return null; + } } diff --git a/lucene/contrib/instantiated/src/test/org/apache/lucene/store/instantiated/TestEmptyIndex.java b/lucene/contrib/instantiated/src/test/org/apache/lucene/store/instantiated/TestEmptyIndex.java index f513a0bb423..40811908d2c 100644 --- a/lucene/contrib/instantiated/src/test/org/apache/lucene/store/instantiated/TestEmptyIndex.java +++ b/lucene/contrib/instantiated/src/test/org/apache/lucene/store/instantiated/TestEmptyIndex.java @@ -59,7 +59,7 @@ public class TestEmptyIndex extends LuceneTestCase { // make sure a Directory acts the same Directory d = newDirectory(); - new IndexWriter(d, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer())).close(); + new IndexWriter(d, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random))).close(); r = IndexReader.open(d, false); testNorms(r); r.close(); @@ -84,7 +84,7 @@ public class TestEmptyIndex extends LuceneTestCase { // make sure a Directory acts the same Directory d = newDirectory(); - new IndexWriter(d, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer())).close(); + new IndexWriter(d, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random))).close(); r = IndexReader.open(d, false); termsEnumTest(r); r.close(); diff --git a/lucene/contrib/instantiated/src/test/org/apache/lucene/store/instantiated/TestIndicesEquals.java b/lucene/contrib/instantiated/src/test/org/apache/lucene/store/instantiated/TestIndicesEquals.java index 7a5398c4ed0..4048d1c59a8 100644 --- a/lucene/contrib/instantiated/src/test/org/apache/lucene/store/instantiated/TestIndicesEquals.java +++ b/lucene/contrib/instantiated/src/test/org/apache/lucene/store/instantiated/TestIndicesEquals.java @@ -21,6 +21,7 @@ import java.util.Arrays; import java.util.Comparator; import java.util.Iterator; import java.util.List; +import java.util.Random; import org.apache.lucene.analysis.Token; import org.apache.lucene.analysis.TokenStream; @@ -65,7 +66,7 @@ public class TestIndicesEquals extends LuceneTestCase { // create dir data IndexWriter indexWriter = new IndexWriter(dir, newIndexWriterConfig( - TEST_VERSION_CURRENT, new MockAnalyzer()).setMergePolicy(newInOrderLogMergePolicy())); + TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMergePolicy(newLogMergePolicy())); for (int i = 0; i < 20; i++) { Document document = new Document(); @@ -88,10 +89,13 @@ public class TestIndicesEquals extends LuceneTestCase { Directory dir = newDirectory(); InstantiatedIndex ii = new InstantiatedIndex(); - + + // we need to pass the "same" random to both, so they surely index the same payload data. + long seed = random.nextLong(); + // create dir data IndexWriter indexWriter = new IndexWriter(dir, newIndexWriterConfig( - TEST_VERSION_CURRENT, new MockAnalyzer()).setMergePolicy(newInOrderLogMergePolicy())); + TEST_VERSION_CURRENT, new MockAnalyzer(new Random(seed))).setMergePolicy(newLogMergePolicy())); indexWriter.setInfoStream(VERBOSE ? System.out : null); if (VERBOSE) { System.out.println("TEST: make test index"); @@ -104,7 +108,7 @@ public class TestIndicesEquals extends LuceneTestCase { indexWriter.close(); // test ii writer - InstantiatedIndexWriter instantiatedIndexWriter = ii.indexWriterFactory(new MockAnalyzer(), true); + InstantiatedIndexWriter instantiatedIndexWriter = ii.indexWriterFactory(new MockAnalyzer(new Random(seed)), true); for (int i = 0; i < 500; i++) { Document document = new Document(); assembleDocument(document, i); diff --git a/lucene/contrib/instantiated/src/test/org/apache/lucene/store/instantiated/TestRealTime.java b/lucene/contrib/instantiated/src/test/org/apache/lucene/store/instantiated/TestRealTime.java index 413d7f56fae..43b11cc0100 100644 --- a/lucene/contrib/instantiated/src/test/org/apache/lucene/store/instantiated/TestRealTime.java +++ b/lucene/contrib/instantiated/src/test/org/apache/lucene/store/instantiated/TestRealTime.java @@ -36,7 +36,7 @@ public class TestRealTime extends LuceneTestCase { InstantiatedIndex index = new InstantiatedIndex(); InstantiatedIndexReader reader = new InstantiatedIndexReader(index); - IndexSearcher searcher = newSearcher(reader); + IndexSearcher searcher = newSearcher(reader, false); InstantiatedIndexWriter writer = new InstantiatedIndexWriter(index); Document doc; diff --git a/lucene/contrib/instantiated/src/test/org/apache/lucene/store/instantiated/TestUnoptimizedReaderOnConstructor.java b/lucene/contrib/instantiated/src/test/org/apache/lucene/store/instantiated/TestUnoptimizedReaderOnConstructor.java index d3a06998edc..ae52ace5b9e 100644 --- a/lucene/contrib/instantiated/src/test/org/apache/lucene/store/instantiated/TestUnoptimizedReaderOnConstructor.java +++ b/lucene/contrib/instantiated/src/test/org/apache/lucene/store/instantiated/TestUnoptimizedReaderOnConstructor.java @@ -34,17 +34,17 @@ public class TestUnoptimizedReaderOnConstructor extends LuceneTestCase { public void test() throws Exception { Directory dir = newDirectory(); - IndexWriter iw = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer())); + IndexWriter iw = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random))); addDocument(iw, "Hello, world!"); addDocument(iw, "All work and no play makes jack a dull boy"); iw.close(); - iw = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).setOpenMode(OpenMode.APPEND)); + iw = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setOpenMode(OpenMode.APPEND)); addDocument(iw, "Hello, tellus!"); addDocument(iw, "All work and no play makes danny a dull boy"); iw.close(); - iw = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).setOpenMode(OpenMode.APPEND)); + iw = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setOpenMode(OpenMode.APPEND)); addDocument(iw, "Hello, earth!"); addDocument(iw, "All work and no play makes wendy a dull girl"); iw.close(); diff --git a/lucene/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java b/lucene/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java index 8103b01f30c..947d2eb658b 100644 --- a/lucene/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java +++ b/lucene/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java @@ -52,7 +52,7 @@ import org.apache.lucene.index.TermPositionVector; import org.apache.lucene.index.TermVectorMapper; import org.apache.lucene.index.FieldInvertState; import org.apache.lucene.index.IndexReader.ReaderContext; -import org.apache.lucene.index.values.DocValues; +import org.apache.lucene.index.codecs.PerDocValues; import org.apache.lucene.search.Collector; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; @@ -807,12 +807,6 @@ public class MemoryIndex { public TermsEnum terms() { return new MemoryTermsEnum(sortedFields[upto].getValue()); } - - @Override - public DocValues docValues() throws IOException { - // TODO - throw new UnsupportedOperationException("not implemented"); - } }; } @@ -848,12 +842,6 @@ public class MemoryIndex { }; } } - - @Override - public DocValues docValues(String field) throws IOException { - // TODO - throw new UnsupportedOperationException("not implemented"); - } }; } @@ -1287,6 +1275,11 @@ public class MemoryIndex { return Collections.unmodifiableSet(fields.keySet()); } + + @Override + public PerDocValues perDocValues() throws IOException { + return null; + } } diff --git a/lucene/contrib/memory/src/test/org/apache/lucene/index/memory/MemoryIndexTest.java b/lucene/contrib/memory/src/test/org/apache/lucene/index/memory/MemoryIndexTest.java index 8be566a1c99..197721b2236 100644 --- a/lucene/contrib/memory/src/test/org/apache/lucene/index/memory/MemoryIndexTest.java +++ b/lucene/contrib/memory/src/test/org/apache/lucene/index/memory/MemoryIndexTest.java @@ -143,9 +143,9 @@ public class MemoryIndexTest extends BaseTokenStreamTestCase { */ private Analyzer randomAnalyzer() { switch(random.nextInt(3)) { - case 0: return new MockAnalyzer(MockTokenizer.SIMPLE, true); - case 1: return new MockAnalyzer(MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET, true); - default: return new MockAnalyzer(MockTokenizer.WHITESPACE, false); + case 0: return new MockAnalyzer(random, MockTokenizer.SIMPLE, true); + case 1: return new MockAnalyzer(random, MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET, true); + default: return new MockAnalyzer(random, MockTokenizer.WHITESPACE, false); } } diff --git a/lucene/contrib/misc/src/test/org/apache/lucene/index/TestFieldNormModifier.java b/lucene/contrib/misc/src/test/org/apache/lucene/index/TestFieldNormModifier.java index 72d186090fc..a3ad664e05e 100644 --- a/lucene/contrib/misc/src/test/org/apache/lucene/index/TestFieldNormModifier.java +++ b/lucene/contrib/misc/src/test/org/apache/lucene/index/TestFieldNormModifier.java @@ -61,7 +61,7 @@ public class TestFieldNormModifier extends LuceneTestCase { super.setUp(); store = newDirectory(); IndexWriter writer = new IndexWriter(store, newIndexWriterConfig( - TEST_VERSION_CURRENT, new MockAnalyzer()).setMergePolicy(newInOrderLogMergePolicy())); + TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMergePolicy(newLogMergePolicy())); for (int i = 0; i < NUM_DOCS; i++) { Document d = new Document(); diff --git a/lucene/contrib/misc/src/test/org/apache/lucene/index/TestIndexSplitter.java b/lucene/contrib/misc/src/test/org/apache/lucene/index/TestIndexSplitter.java index 9e4d20fb916..2b8b47dea07 100644 --- a/lucene/contrib/misc/src/test/org/apache/lucene/index/TestIndexSplitter.java +++ b/lucene/contrib/misc/src/test/org/apache/lucene/index/TestIndexSplitter.java @@ -39,7 +39,7 @@ public class TestIndexSplitter extends LuceneTestCase { mergePolicy.setNoCFSRatio(1); IndexWriter iw = new IndexWriter( fsDir, - new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()). + new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)). setOpenMode(OpenMode.CREATE). setMergePolicy(mergePolicy) ); diff --git a/lucene/contrib/misc/src/test/org/apache/lucene/index/TestMultiPassIndexSplitter.java b/lucene/contrib/misc/src/test/org/apache/lucene/index/TestMultiPassIndexSplitter.java index 158b24ff58b..776d0c9960d 100644 --- a/lucene/contrib/misc/src/test/org/apache/lucene/index/TestMultiPassIndexSplitter.java +++ b/lucene/contrib/misc/src/test/org/apache/lucene/index/TestMultiPassIndexSplitter.java @@ -32,7 +32,7 @@ public class TestMultiPassIndexSplitter extends LuceneTestCase { public void setUp() throws Exception { super.setUp(); dir = newDirectory(); - IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).setMergePolicy(newInOrderLogMergePolicy())); + IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMergePolicy(newLogMergePolicy())); Document doc; for (int i = 0; i < NUM_DOCS; i++) { doc = new Document(); diff --git a/lucene/contrib/misc/src/test/org/apache/lucene/index/TestTermVectorAccessor.java b/lucene/contrib/misc/src/test/org/apache/lucene/index/TestTermVectorAccessor.java index 65e6bca1d66..dd79d727835 100644 --- a/lucene/contrib/misc/src/test/org/apache/lucene/index/TestTermVectorAccessor.java +++ b/lucene/contrib/misc/src/test/org/apache/lucene/index/TestTermVectorAccessor.java @@ -25,7 +25,7 @@ public class TestTermVectorAccessor extends LuceneTestCase { public void test() throws Exception { Directory dir = newDirectory(); - IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer())); + IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random))); Document doc; diff --git a/lucene/contrib/misc/src/test/org/apache/lucene/index/codecs/appending/TestAppendingCodec.java b/lucene/contrib/misc/src/test/org/apache/lucene/index/codecs/appending/TestAppendingCodec.java index 593c895cf66..125cc1847a9 100644 --- a/lucene/contrib/misc/src/test/org/apache/lucene/index/codecs/appending/TestAppendingCodec.java +++ b/lucene/contrib/misc/src/test/org/apache/lucene/index/codecs/appending/TestAppendingCodec.java @@ -30,7 +30,7 @@ import org.apache.lucene.index.Fields; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; -import org.apache.lucene.index.LogMergePolicy; +import org.apache.lucene.index.TieredMergePolicy; import org.apache.lucene.index.MultiFields; import org.apache.lucene.index.Terms; import org.apache.lucene.index.TermsEnum; @@ -134,10 +134,10 @@ public class TestAppendingCodec extends LuceneTestCase { public void testCodec() throws Exception { Directory dir = new AppendingRAMDirectory(random, new RAMDirectory()); - IndexWriterConfig cfg = new IndexWriterConfig(Version.LUCENE_40, new MockAnalyzer()); + IndexWriterConfig cfg = new IndexWriterConfig(Version.LUCENE_40, new MockAnalyzer(random)); cfg.setCodecProvider(new AppendingCodecProvider()); - ((LogMergePolicy)cfg.getMergePolicy()).setUseCompoundFile(false); + ((TieredMergePolicy)cfg.getMergePolicy()).setUseCompoundFile(false); IndexWriter writer = new IndexWriter(dir, cfg); Document doc = new Document(); doc.add(newField("f", text, Store.YES, Index.ANALYZED, TermVector.WITH_POSITIONS_OFFSETS)); diff --git a/lucene/contrib/misc/src/test/org/apache/lucene/misc/TestHighFreqTerms.java b/lucene/contrib/misc/src/test/org/apache/lucene/misc/TestHighFreqTerms.java index cb33cfa8be9..5d6eb8ad8a9 100644 --- a/lucene/contrib/misc/src/test/org/apache/lucene/misc/TestHighFreqTerms.java +++ b/lucene/contrib/misc/src/test/org/apache/lucene/misc/TestHighFreqTerms.java @@ -40,7 +40,7 @@ public class TestHighFreqTerms extends LuceneTestCase { public static void setUpClass() throws Exception { dir = newDirectory(); writer = new IndexWriter(dir, newIndexWriterConfig(random, - TEST_VERSION_CURRENT, new MockAnalyzer(MockTokenizer.WHITESPACE, false)) + TEST_VERSION_CURRENT, new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)) .setMaxBufferedDocs(2)); writer.setInfoStream(VERBOSE ? System.out : null); indexDocs(writer); diff --git a/lucene/contrib/misc/src/test/org/apache/lucene/misc/TestLengthNormModifier.java b/lucene/contrib/misc/src/test/org/apache/lucene/misc/TestLengthNormModifier.java index 5bd4ad530f5..ad290c7e490 100644 --- a/lucene/contrib/misc/src/test/org/apache/lucene/misc/TestLengthNormModifier.java +++ b/lucene/contrib/misc/src/test/org/apache/lucene/misc/TestLengthNormModifier.java @@ -66,7 +66,7 @@ public class TestLengthNormModifier extends LuceneTestCase { super.setUp(); store = newDirectory(); IndexWriter writer = new IndexWriter(store, newIndexWriterConfig( - TEST_VERSION_CURRENT, new MockAnalyzer()).setMergePolicy(newInOrderLogMergePolicy())); + TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMergePolicy(newLogMergePolicy())); for (int i = 0; i < NUM_DOCS; i++) { Document d = new Document(); diff --git a/lucene/contrib/queries/src/test/org/apache/lucene/search/BooleanFilterTest.java b/lucene/contrib/queries/src/test/org/apache/lucene/search/BooleanFilterTest.java index 6fb01df5736..5a1bf66dab3 100644 --- a/lucene/contrib/queries/src/test/org/apache/lucene/search/BooleanFilterTest.java +++ b/lucene/contrib/queries/src/test/org/apache/lucene/search/BooleanFilterTest.java @@ -39,7 +39,7 @@ public class BooleanFilterTest extends LuceneTestCase { public void setUp() throws Exception { super.setUp(); directory = newDirectory(); - RandomIndexWriter writer = new RandomIndexWriter(random, directory, new MockAnalyzer(MockTokenizer.WHITESPACE, false)); + RandomIndexWriter writer = new RandomIndexWriter(random, directory, new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)); //Add series of docs with filterable fields : acces rights, prices, dates and "in-stock" flags addDoc(writer, "admin guest", "010", "20040101","Y"); diff --git a/lucene/contrib/queries/src/test/org/apache/lucene/search/DuplicateFilterTest.java b/lucene/contrib/queries/src/test/org/apache/lucene/search/DuplicateFilterTest.java index 29c7f0f2e37..b4a6c8885bb 100644 --- a/lucene/contrib/queries/src/test/org/apache/lucene/search/DuplicateFilterTest.java +++ b/lucene/contrib/queries/src/test/org/apache/lucene/search/DuplicateFilterTest.java @@ -43,7 +43,7 @@ public class DuplicateFilterTest extends LuceneTestCase { public void setUp() throws Exception { super.setUp(); directory = newDirectory(); - RandomIndexWriter writer = new RandomIndexWriter(random, directory, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).setMergePolicy(newInOrderLogMergePolicy())); + RandomIndexWriter writer = new RandomIndexWriter(random, directory, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMergePolicy(newLogMergePolicy())); //Add series of docs with filterable fields : url, text and dates flags addDoc(writer, "http://lucene.apache.org", "lucene 1.4.3 available", "20040101"); diff --git a/lucene/contrib/queries/src/test/org/apache/lucene/search/FuzzyLikeThisQueryTest.java b/lucene/contrib/queries/src/test/org/apache/lucene/search/FuzzyLikeThisQueryTest.java index 0f9b6ca7712..5957bf05751 100644 --- a/lucene/contrib/queries/src/test/org/apache/lucene/search/FuzzyLikeThisQueryTest.java +++ b/lucene/contrib/queries/src/test/org/apache/lucene/search/FuzzyLikeThisQueryTest.java @@ -34,13 +34,13 @@ public class FuzzyLikeThisQueryTest extends LuceneTestCase { private Directory directory; private IndexSearcher searcher; private IndexReader reader; - private Analyzer analyzer=new MockAnalyzer(); + private Analyzer analyzer=new MockAnalyzer(random); @Override public void setUp() throws Exception { super.setUp(); directory = newDirectory(); - RandomIndexWriter writer = new RandomIndexWriter(random, directory, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).setMergePolicy(newInOrderLogMergePolicy())); + RandomIndexWriter writer = new RandomIndexWriter(random, directory, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMergePolicy(newLogMergePolicy())); //Add series of docs with misspelt names addDoc(writer, "jonathon smythe","1"); @@ -121,7 +121,7 @@ public class FuzzyLikeThisQueryTest extends LuceneTestCase { } public void testFuzzyLikeThisQueryEquals() { - Analyzer analyzer = new MockAnalyzer(); + Analyzer analyzer = new MockAnalyzer(random); FuzzyLikeThisQuery fltq1 = new FuzzyLikeThisQuery(10, analyzer); fltq1.addTerms("javi", "subject", 0.5f, 2); FuzzyLikeThisQuery fltq2 = new FuzzyLikeThisQuery(10, analyzer); diff --git a/lucene/contrib/queries/src/test/org/apache/lucene/search/TestFieldCacheRewriteMethod.java b/lucene/contrib/queries/src/test/org/apache/lucene/search/TestFieldCacheRewriteMethod.java index 73f666eee10..b261cdea031 100644 --- a/lucene/contrib/queries/src/test/org/apache/lucene/search/TestFieldCacheRewriteMethod.java +++ b/lucene/contrib/queries/src/test/org/apache/lucene/search/TestFieldCacheRewriteMethod.java @@ -36,8 +36,8 @@ public class TestFieldCacheRewriteMethod extends TestRegexpRandom2 { RegexpQuery filter = new RegexpQuery(new Term("field", regexp), RegExp.NONE); filter.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE); - TopDocs fieldCacheDocs = searcher.search(fieldCache, 25); - TopDocs filterDocs = searcher.search(filter, 25); + TopDocs fieldCacheDocs = searcher1.search(fieldCache, 25); + TopDocs filterDocs = searcher2.search(filter, 25); CheckHits.checkEqual(fieldCache, fieldCacheDocs.scoreDocs, filterDocs.scoreDocs); } diff --git a/lucene/contrib/queries/src/test/org/apache/lucene/search/regex/TestSpanRegexQuery.java b/lucene/contrib/queries/src/test/org/apache/lucene/search/regex/TestSpanRegexQuery.java index fd32f13abe6..ae7ad5f202c 100644 --- a/lucene/contrib/queries/src/test/org/apache/lucene/search/regex/TestSpanRegexQuery.java +++ b/lucene/contrib/queries/src/test/org/apache/lucene/search/regex/TestSpanRegexQuery.java @@ -56,7 +56,7 @@ public class TestSpanRegexQuery extends LuceneTestCase { public void testSpanRegex() throws Exception { Directory directory = newDirectory(); IndexWriter writer = new IndexWriter(directory, newIndexWriterConfig( - TEST_VERSION_CURRENT, new MockAnalyzer())); + TEST_VERSION_CURRENT, new MockAnalyzer(random))); Document doc = new Document(); // doc.add(newField("field", "the quick brown fox jumps over the lazy dog", // Field.Store.NO, Field.Index.ANALYZED)); @@ -97,14 +97,14 @@ public class TestSpanRegexQuery extends LuceneTestCase { // creating first index writer IndexWriter writerA = new IndexWriter(indexStoreA, newIndexWriterConfig( - TEST_VERSION_CURRENT, new MockAnalyzer()).setOpenMode(OpenMode.CREATE)); + TEST_VERSION_CURRENT, new MockAnalyzer(random)).setOpenMode(OpenMode.CREATE)); writerA.addDocument(lDoc); writerA.optimize(); writerA.close(); // creating second index writer IndexWriter writerB = new IndexWriter(indexStoreB, newIndexWriterConfig( - TEST_VERSION_CURRENT, new MockAnalyzer()).setOpenMode(OpenMode.CREATE)); + TEST_VERSION_CURRENT, new MockAnalyzer(random)).setOpenMode(OpenMode.CREATE)); writerB.addDocument(lDoc2); writerB.optimize(); writerB.close(); diff --git a/lucene/contrib/queries/src/test/org/apache/lucene/search/similar/TestMoreLikeThis.java b/lucene/contrib/queries/src/test/org/apache/lucene/search/similar/TestMoreLikeThis.java index 6de5e91ddc5..26b6a191c4b 100644 --- a/lucene/contrib/queries/src/test/org/apache/lucene/search/similar/TestMoreLikeThis.java +++ b/lucene/contrib/queries/src/test/org/apache/lucene/search/similar/TestMoreLikeThis.java @@ -74,7 +74,7 @@ public class TestMoreLikeThis extends LuceneTestCase { Map originalValues = getOriginalValues(); MoreLikeThis mlt = new MoreLikeThis(reader); - mlt.setAnalyzer(new MockAnalyzer(MockTokenizer.WHITESPACE, false)); + mlt.setAnalyzer(new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)); mlt.setMinDocFreq(1); mlt.setMinTermFreq(1); mlt.setMinWordLen(1); @@ -109,7 +109,7 @@ public class TestMoreLikeThis extends LuceneTestCase { private Map getOriginalValues() throws IOException { Map originalValues = new HashMap(); MoreLikeThis mlt = new MoreLikeThis(reader); - mlt.setAnalyzer(new MockAnalyzer(MockTokenizer.WHITESPACE, false)); + mlt.setAnalyzer(new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)); mlt.setMinDocFreq(1); mlt.setMinTermFreq(1); mlt.setMinWordLen(1); diff --git a/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/core/nodes/QueryNodeImpl.java b/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/core/nodes/QueryNodeImpl.java index 745d8f1529c..dcc4811febc 100644 --- a/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/core/nodes/QueryNodeImpl.java +++ b/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/core/nodes/QueryNodeImpl.java @@ -160,7 +160,7 @@ public abstract class QueryNodeImpl implements QueryNode, Cloneable { /** verify if a node contains a tag */ public boolean containsTag(String tagName) { - return this.tags.containsKey(tagName); + return this.tags.containsKey(tagName.toLowerCase()); } public Object getTag(String tagName) { diff --git a/lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/complexPhrase/TestComplexPhraseQuery.java b/lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/complexPhrase/TestComplexPhraseQuery.java index f163a4cece5..b8aaae839c7 100644 --- a/lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/complexPhrase/TestComplexPhraseQuery.java +++ b/lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/complexPhrase/TestComplexPhraseQuery.java @@ -34,7 +34,7 @@ import org.apache.lucene.util.LuceneTestCase; public class TestComplexPhraseQuery extends LuceneTestCase { Directory rd; - Analyzer analyzer = new MockAnalyzer(); + Analyzer analyzer = new MockAnalyzer(random); DocData docsContent[] = { new DocData("john smith", "1"), new DocData("johathon smith", "2"), diff --git a/lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/core/nodes/TestQueryNode.java b/lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/core/nodes/TestQueryNode.java index 23d4fb4ef4c..b805a438ce1 100644 --- a/lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/core/nodes/TestQueryNode.java +++ b/lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/core/nodes/TestQueryNode.java @@ -32,4 +32,16 @@ public class TestQueryNode extends LuceneTestCase { bq.add(Arrays.asList(nodeB)); assertEquals(2, bq.getChildren().size()); } + + /* LUCENE-3045 bug in QueryNodeImpl.containsTag(String key)*/ + public void testTags() throws Exception { + QueryNode node = new FieldQueryNode("foo", "A", 0, 1); + + node.setTag("TaG", new Object()); + assertTrue(node.getTagMap().size() > 0); + assertTrue(node.containsTag("tAg")); + assertTrue(node.getTag("tAg") != null); + + } + } diff --git a/lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/ext/TestExtendableQueryParser.java b/lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/ext/TestExtendableQueryParser.java index c60a0eb6895..366168feed7 100644 --- a/lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/ext/TestExtendableQueryParser.java +++ b/lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/ext/TestExtendableQueryParser.java @@ -43,7 +43,7 @@ public class TestExtendableQueryParser extends TestQueryParser { public QueryParser getParser(Analyzer a, Extensions extensions) throws Exception { if (a == null) - a = new MockAnalyzer(MockTokenizer.SIMPLE, true); + a = new MockAnalyzer(random, MockTokenizer.SIMPLE, true); QueryParser qp = extensions == null ? new ExtendableQueryParser( TEST_VERSION_CURRENT, "field", a) : new ExtendableQueryParser( TEST_VERSION_CURRENT, "field", a, extensions); diff --git a/lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/precedence/TestPrecedenceQueryParser.java b/lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/precedence/TestPrecedenceQueryParser.java index 5cba05b3111..cd719791f35 100644 --- a/lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/precedence/TestPrecedenceQueryParser.java +++ b/lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/precedence/TestPrecedenceQueryParser.java @@ -125,7 +125,7 @@ public class TestPrecedenceQueryParser extends LuceneTestCase { public PrecedenceQueryParser getParser(Analyzer a) throws Exception { if (a == null) - a = new MockAnalyzer(MockTokenizer.SIMPLE, true); + a = new MockAnalyzer(random, MockTokenizer.SIMPLE, true); PrecedenceQueryParser qp = new PrecedenceQueryParser(); qp.setAnalyzer(a); qp.setDefaultOperator(Operator.OR); @@ -171,7 +171,7 @@ public class TestPrecedenceQueryParser extends LuceneTestCase { public Query getQueryDOA(String query, Analyzer a) throws Exception { if (a == null) - a = new MockAnalyzer(MockTokenizer.SIMPLE, true); + a = new MockAnalyzer(random, MockTokenizer.SIMPLE, true); PrecedenceQueryParser qp = new PrecedenceQueryParser(); qp.setAnalyzer(a); qp.setDefaultOperator(Operator.AND); @@ -232,7 +232,7 @@ public class TestPrecedenceQueryParser extends LuceneTestCase { "+(title:dog title:cat) -author:\"bob dole\""); PrecedenceQueryParser qp = new PrecedenceQueryParser(); - qp.setAnalyzer(new MockAnalyzer()); + qp.setAnalyzer(new MockAnalyzer(random)); // make sure OR is the default: assertEquals(Operator.OR, qp.getDefaultOperator()); qp.setDefaultOperator(Operator.AND); @@ -246,7 +246,7 @@ public class TestPrecedenceQueryParser extends LuceneTestCase { } public void testPunct() throws Exception { - Analyzer a = new MockAnalyzer(MockTokenizer.WHITESPACE, false); + Analyzer a = new MockAnalyzer(random, MockTokenizer.WHITESPACE, false); assertQueryEquals("a&b", a, "a&b"); assertQueryEquals("a&&b", a, "a&&b"); assertQueryEquals(".NET", a, ".NET"); @@ -266,7 +266,7 @@ public class TestPrecedenceQueryParser extends LuceneTestCase { assertQueryEquals("term 1.0 1 2", null, "term"); assertQueryEquals("term term1 term2", null, "term term term"); - Analyzer a = new MockAnalyzer(); + Analyzer a = new MockAnalyzer(random); assertQueryEquals("3", a, "3"); assertQueryEquals("term 1.0 1 2", a, "term 1.0 1 2"); assertQueryEquals("term term1 term2", a, "term term1 term2"); @@ -405,7 +405,7 @@ public class TestPrecedenceQueryParser extends LuceneTestCase { final String defaultField = "default"; final String monthField = "month"; final String hourField = "hour"; - PrecedenceQueryParser qp = new PrecedenceQueryParser(new MockAnalyzer()); + PrecedenceQueryParser qp = new PrecedenceQueryParser(new MockAnalyzer(random)); Map fieldMap = new HashMap(); // set a field specific date resolution @@ -467,7 +467,7 @@ public class TestPrecedenceQueryParser extends LuceneTestCase { } public void testEscaped() throws Exception { - Analyzer a = new MockAnalyzer(MockTokenizer.WHITESPACE, false); + Analyzer a = new MockAnalyzer(random, MockTokenizer.WHITESPACE, false); assertQueryEquals("a\\-b:c", a, "a-b:c"); assertQueryEquals("a\\+b:c", a, "a+b:c"); @@ -533,7 +533,7 @@ public class TestPrecedenceQueryParser extends LuceneTestCase { public void testBoost() throws Exception { CharacterRunAutomaton stopSet = new CharacterRunAutomaton(BasicAutomata.makeString("on")); - Analyzer oneStopAnalyzer = new MockAnalyzer(MockTokenizer.SIMPLE, true, stopSet, true); + Analyzer oneStopAnalyzer = new MockAnalyzer(random, MockTokenizer.SIMPLE, true, stopSet, true); PrecedenceQueryParser qp = new PrecedenceQueryParser(); qp.setAnalyzer(oneStopAnalyzer); @@ -548,7 +548,7 @@ public class TestPrecedenceQueryParser extends LuceneTestCase { q = qp.parse("\"on\"^1.0", "field"); assertNotNull(q); - q = getParser(new MockAnalyzer(MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET, true)).parse("the^3", + q = getParser(new MockAnalyzer(random, MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET, true)).parse("the^3", "field"); assertNotNull(q); } @@ -564,7 +564,7 @@ public class TestPrecedenceQueryParser extends LuceneTestCase { public void testBooleanQuery() throws Exception { BooleanQuery.setMaxClauseCount(2); try { - getParser(new MockAnalyzer(MockTokenizer.WHITESPACE, false)).parse("one two three", "field"); + getParser(new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)).parse("one two three", "field"); fail("ParseException expected due to too many boolean clauses"); } catch (QueryNodeException expected) { // too many boolean clauses, so ParseException is expected @@ -573,7 +573,7 @@ public class TestPrecedenceQueryParser extends LuceneTestCase { // LUCENE-792 public void testNOT() throws Exception { - Analyzer a = new MockAnalyzer(MockTokenizer.WHITESPACE, false); + Analyzer a = new MockAnalyzer(random, MockTokenizer.WHITESPACE, false); assertQueryEquals("NOT foo AND bar", a, "-foo +bar"); } @@ -582,7 +582,7 @@ public class TestPrecedenceQueryParser extends LuceneTestCase { * issue has been corrected. */ public void testPrecedence() throws Exception { - PrecedenceQueryParser parser = getParser(new MockAnalyzer(MockTokenizer.WHITESPACE, false)); + PrecedenceQueryParser parser = getParser(new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)); Query query1 = parser.parse("A AND B OR C AND D", "field"); Query query2 = parser.parse("(A AND B) OR (C AND D)", "field"); assertEquals(query1, query2); diff --git a/lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/TestMultiFieldQPHelper.java b/lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/TestMultiFieldQPHelper.java index 55e9e183c09..11027b74bdf 100644 --- a/lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/TestMultiFieldQPHelper.java +++ b/lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/TestMultiFieldQPHelper.java @@ -80,7 +80,7 @@ public class TestMultiFieldQPHelper extends LuceneTestCase { String[] fields = { "b", "t" }; StandardQueryParser mfqp = new StandardQueryParser(); mfqp.setMultiFields(fields); - mfqp.setAnalyzer(new MockAnalyzer()); + mfqp.setAnalyzer(new MockAnalyzer(random)); Query q = mfqp.parse("one", null); assertEquals("b:one t:one", q.toString()); @@ -150,7 +150,7 @@ public class TestMultiFieldQPHelper extends LuceneTestCase { StandardQueryParser mfqp = new StandardQueryParser(); mfqp.setMultiFields(fields); mfqp.setFieldsBoost(boosts); - mfqp.setAnalyzer(new MockAnalyzer()); + mfqp.setAnalyzer(new MockAnalyzer(random)); // Check for simple Query q = mfqp.parse("one", null); @@ -178,24 +178,24 @@ public class TestMultiFieldQPHelper extends LuceneTestCase { public void testStaticMethod1() throws QueryNodeException { String[] fields = { "b", "t" }; String[] queries = { "one", "two" }; - Query q = QueryParserUtil.parse(queries, fields, new MockAnalyzer()); + Query q = QueryParserUtil.parse(queries, fields, new MockAnalyzer(random)); assertEquals("b:one t:two", q.toString()); String[] queries2 = { "+one", "+two" }; - q = QueryParserUtil.parse(queries2, fields, new MockAnalyzer()); + q = QueryParserUtil.parse(queries2, fields, new MockAnalyzer(random)); assertEquals("(+b:one) (+t:two)", q.toString()); String[] queries3 = { "one", "+two" }; - q = QueryParserUtil.parse(queries3, fields, new MockAnalyzer()); + q = QueryParserUtil.parse(queries3, fields, new MockAnalyzer(random)); assertEquals("b:one (+t:two)", q.toString()); String[] queries4 = { "one +more", "+two" }; - q = QueryParserUtil.parse(queries4, fields, new MockAnalyzer()); + q = QueryParserUtil.parse(queries4, fields, new MockAnalyzer(random)); assertEquals("(b:one +b:more) (+t:two)", q.toString()); String[] queries5 = { "blah" }; try { - q = QueryParserUtil.parse(queries5, fields, new MockAnalyzer()); + q = QueryParserUtil.parse(queries5, fields, new MockAnalyzer(random)); fail(); } catch (IllegalArgumentException e) { // expected exception, array length differs @@ -219,15 +219,15 @@ public class TestMultiFieldQPHelper extends LuceneTestCase { BooleanClause.Occur[] flags = { BooleanClause.Occur.MUST, BooleanClause.Occur.MUST_NOT }; Query q = QueryParserUtil.parse("one", fields, flags, - new MockAnalyzer()); + new MockAnalyzer(random)); assertEquals("+b:one -t:one", q.toString()); - q = QueryParserUtil.parse("one two", fields, flags, new MockAnalyzer()); + q = QueryParserUtil.parse("one two", fields, flags, new MockAnalyzer(random)); assertEquals("+(b:one b:two) -(t:one t:two)", q.toString()); try { BooleanClause.Occur[] flags2 = { BooleanClause.Occur.MUST }; - q = QueryParserUtil.parse("blah", fields, flags2, new MockAnalyzer()); + q = QueryParserUtil.parse("blah", fields, flags2, new MockAnalyzer(random)); fail(); } catch (IllegalArgumentException e) { // expected exception, array length differs @@ -240,19 +240,19 @@ public class TestMultiFieldQPHelper extends LuceneTestCase { BooleanClause.Occur.MUST_NOT }; StandardQueryParser parser = new StandardQueryParser(); parser.setMultiFields(fields); - parser.setAnalyzer(new MockAnalyzer()); + parser.setAnalyzer(new MockAnalyzer(random)); Query q = QueryParserUtil.parse("one", fields, flags, - new MockAnalyzer());// , fields, flags, new + new MockAnalyzer(random));// , fields, flags, new // MockAnalyzer()); assertEquals("+b:one -t:one", q.toString()); - q = QueryParserUtil.parse("one two", fields, flags, new MockAnalyzer()); + q = QueryParserUtil.parse("one two", fields, flags, new MockAnalyzer(random)); assertEquals("+(b:one b:two) -(t:one t:two)", q.toString()); try { BooleanClause.Occur[] flags2 = { BooleanClause.Occur.MUST }; - q = QueryParserUtil.parse("blah", fields, flags2, new MockAnalyzer()); + q = QueryParserUtil.parse("blah", fields, flags2, new MockAnalyzer(random)); fail(); } catch (IllegalArgumentException e) { // expected exception, array length differs @@ -265,13 +265,13 @@ public class TestMultiFieldQPHelper extends LuceneTestCase { BooleanClause.Occur[] flags = { BooleanClause.Occur.MUST, BooleanClause.Occur.MUST_NOT, BooleanClause.Occur.SHOULD }; Query q = QueryParserUtil.parse(queries, fields, flags, - new MockAnalyzer()); + new MockAnalyzer(random)); assertEquals("+f1:one -f2:two f3:three", q.toString()); try { BooleanClause.Occur[] flags2 = { BooleanClause.Occur.MUST }; q = QueryParserUtil - .parse(queries, fields, flags2, new MockAnalyzer()); + .parse(queries, fields, flags2, new MockAnalyzer(random)); fail(); } catch (IllegalArgumentException e) { // expected exception, array length differs @@ -284,13 +284,13 @@ public class TestMultiFieldQPHelper extends LuceneTestCase { BooleanClause.Occur[] flags = { BooleanClause.Occur.MUST, BooleanClause.Occur.MUST_NOT }; Query q = QueryParserUtil.parse(queries, fields, flags, - new MockAnalyzer()); + new MockAnalyzer(random)); assertEquals("+b:one -t:two", q.toString()); try { BooleanClause.Occur[] flags2 = { BooleanClause.Occur.MUST }; q = QueryParserUtil - .parse(queries, fields, flags2, new MockAnalyzer()); + .parse(queries, fields, flags2, new MockAnalyzer(random)); fail(); } catch (IllegalArgumentException e) { // expected exception, array length differs @@ -316,7 +316,7 @@ public class TestMultiFieldQPHelper extends LuceneTestCase { } public void testStopWordSearching() throws Exception { - Analyzer analyzer = new MockAnalyzer(); + Analyzer analyzer = new MockAnalyzer(random); Directory ramDir = newDirectory(); IndexWriter iw = new IndexWriter(ramDir, newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer)); Document doc = new Document(); @@ -342,7 +342,7 @@ public class TestMultiFieldQPHelper extends LuceneTestCase { * Return empty tokens for field "f1". */ private static final class AnalyzerReturningNull extends Analyzer { - MockAnalyzer stdAnalyzer = new MockAnalyzer(); + MockAnalyzer stdAnalyzer = new MockAnalyzer(random); public AnalyzerReturningNull() { } diff --git a/lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/TestQPHelper.java b/lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/TestQPHelper.java index 563aaf2fd10..e3de2ee0aa3 100644 --- a/lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/TestQPHelper.java +++ b/lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/TestQPHelper.java @@ -191,7 +191,7 @@ public class TestQPHelper extends LuceneTestCase { public StandardQueryParser getParser(Analyzer a) throws Exception { if (a == null) - a = new MockAnalyzer(MockTokenizer.SIMPLE, true); + a = new MockAnalyzer(random, MockTokenizer.SIMPLE, true); StandardQueryParser qp = new StandardQueryParser(); qp.setAnalyzer(a); @@ -281,7 +281,7 @@ public class TestQPHelper extends LuceneTestCase { public Query getQueryDOA(String query, Analyzer a) throws Exception { if (a == null) - a = new MockAnalyzer(MockTokenizer.SIMPLE, true); + a = new MockAnalyzer(random, MockTokenizer.SIMPLE, true); StandardQueryParser qp = new StandardQueryParser(); qp.setAnalyzer(a); qp.setDefaultOperator(Operator.AND); @@ -301,7 +301,7 @@ public class TestQPHelper extends LuceneTestCase { } public void testConstantScoreAutoRewrite() throws Exception { - StandardQueryParser qp = new StandardQueryParser(new MockAnalyzer(MockTokenizer.WHITESPACE, false)); + StandardQueryParser qp = new StandardQueryParser(new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)); Query q = qp.parse("foo*bar", "field"); assertTrue(q instanceof WildcardQuery); assertEquals(MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT, ((MultiTermQuery) q).getRewriteMethod()); @@ -410,9 +410,9 @@ public class TestQPHelper extends LuceneTestCase { public void testSimple() throws Exception { assertQueryEquals("\"term germ\"~2", null, "\"term germ\"~2"); assertQueryEquals("term term term", null, "term term term"); - assertQueryEquals("t�rm term term", new MockAnalyzer(MockTokenizer.WHITESPACE, false), + assertQueryEquals("t�rm term term", new MockAnalyzer(random, MockTokenizer.WHITESPACE, false), "t�rm term term"); - assertQueryEquals("�mlaut", new MockAnalyzer(MockTokenizer.WHITESPACE, false), "�mlaut"); + assertQueryEquals("�mlaut", new MockAnalyzer(random, MockTokenizer.WHITESPACE, false), "�mlaut"); // FIXME: change MockAnalyzer to not extend CharTokenizer for this test //assertQueryEquals("\"\"", new KeywordAnalyzer(), ""); @@ -470,7 +470,7 @@ public class TestQPHelper extends LuceneTestCase { } public void testPunct() throws Exception { - Analyzer a = new MockAnalyzer(MockTokenizer.WHITESPACE, false); + Analyzer a = new MockAnalyzer(random, MockTokenizer.WHITESPACE, false); assertQueryEquals("a&b", a, "a&b"); assertQueryEquals("a&&b", a, "a&&b"); assertQueryEquals(".NET", a, ".NET"); @@ -491,7 +491,7 @@ public class TestQPHelper extends LuceneTestCase { assertQueryEquals("term 1.0 1 2", null, "term"); assertQueryEquals("term term1 term2", null, "term term term"); - Analyzer a = new MockAnalyzer(MockTokenizer.WHITESPACE, false); + Analyzer a = new MockAnalyzer(random, MockTokenizer.WHITESPACE, false); assertQueryEquals("3", a, "3"); assertQueryEquals("term 1.0 1 2", a, "term 1.0 1 2"); assertQueryEquals("term term1 term2", a, "term term1 term2"); @@ -726,7 +726,7 @@ public class TestQPHelper extends LuceneTestCase { } public void testEscaped() throws Exception { - Analyzer a = new MockAnalyzer(MockTokenizer.WHITESPACE, false); + Analyzer a = new MockAnalyzer(random, MockTokenizer.WHITESPACE, false); /* * assertQueryEquals("\\[brackets", a, "\\[brackets"); @@ -825,7 +825,7 @@ public class TestQPHelper extends LuceneTestCase { } public void testQueryStringEscaping() throws Exception { - Analyzer a = new MockAnalyzer(MockTokenizer.WHITESPACE, false); + Analyzer a = new MockAnalyzer(random, MockTokenizer.WHITESPACE, false); assertEscapedQueryEquals("a-b:c", a, "a\\-b\\:c"); assertEscapedQueryEquals("a+b:c", a, "a\\+b\\:c"); @@ -866,7 +866,7 @@ public class TestQPHelper extends LuceneTestCase { @Ignore("contrib queryparser shouldn't escape wildcard terms") public void testEscapedWildcard() throws Exception { StandardQueryParser qp = new StandardQueryParser(); - qp.setAnalyzer(new MockAnalyzer(MockTokenizer.WHITESPACE, false)); + qp.setAnalyzer(new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)); WildcardQuery q = new WildcardQuery(new Term("field", "foo\\?ba?r")); assertEquals(q, qp.parse("foo\\?ba?r", "field")); @@ -904,7 +904,7 @@ public class TestQPHelper extends LuceneTestCase { public void testBoost() throws Exception { CharacterRunAutomaton stopSet = new CharacterRunAutomaton(BasicAutomata.makeString("on")); - Analyzer oneStopAnalyzer = new MockAnalyzer(MockTokenizer.SIMPLE, true, stopSet, true); + Analyzer oneStopAnalyzer = new MockAnalyzer(random, MockTokenizer.SIMPLE, true, stopSet, true); StandardQueryParser qp = new StandardQueryParser(); qp.setAnalyzer(oneStopAnalyzer); @@ -920,7 +920,7 @@ public class TestQPHelper extends LuceneTestCase { assertNotNull(q); StandardQueryParser qp2 = new StandardQueryParser(); - qp2.setAnalyzer(new MockAnalyzer(MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET, true)); + qp2.setAnalyzer(new MockAnalyzer(random, MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET, true)); q = qp2.parse("the^3", "field"); // "the" is a stop word so the result is an empty query: @@ -950,7 +950,7 @@ public class TestQPHelper extends LuceneTestCase { public void testCustomQueryParserWildcard() { try { - new QPTestParser(new MockAnalyzer(MockTokenizer.WHITESPACE, false)).parse("a?t", "contents"); + new QPTestParser(new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)).parse("a?t", "contents"); fail("Wildcard queries should not be allowed"); } catch (QueryNodeException expected) { // expected exception @@ -959,7 +959,7 @@ public class TestQPHelper extends LuceneTestCase { public void testCustomQueryParserFuzzy() throws Exception { try { - new QPTestParser(new MockAnalyzer(MockTokenizer.WHITESPACE, false)).parse("xunit~", "contents"); + new QPTestParser(new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)).parse("xunit~", "contents"); fail("Fuzzy queries should not be allowed"); } catch (QueryNodeException expected) { // expected exception @@ -970,7 +970,7 @@ public class TestQPHelper extends LuceneTestCase { BooleanQuery.setMaxClauseCount(2); try { StandardQueryParser qp = new StandardQueryParser(); - qp.setAnalyzer(new MockAnalyzer(MockTokenizer.WHITESPACE, false)); + qp.setAnalyzer(new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)); qp.parse("one two three", "field"); fail("ParseException expected due to too many boolean clauses"); @@ -984,7 +984,7 @@ public class TestQPHelper extends LuceneTestCase { */ public void testPrecedence() throws Exception { StandardQueryParser qp = new StandardQueryParser(); - qp.setAnalyzer(new MockAnalyzer(MockTokenizer.WHITESPACE, false)); + qp.setAnalyzer(new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)); Query query1 = qp.parse("A AND B OR C AND D", "field"); Query query2 = qp.parse("+A +B +C +D", "field"); @@ -995,7 +995,7 @@ public class TestQPHelper extends LuceneTestCase { // Todo: Convert from DateField to DateUtil // public void testLocalDateFormat() throws IOException, QueryNodeException { // Directory ramDir = newDirectory(); -// IndexWriter iw = new IndexWriter(ramDir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(MockTokenizer.WHITESPACE, false))); +// IndexWriter iw = new IndexWriter(ramDir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random, MockTokenizer.WHITESPACE, false))); // addDateDoc("a", 2005, 12, 2, 10, 15, 33, iw); // addDateDoc("b", 2005, 12, 4, 22, 15, 00, iw); // iw.close(); @@ -1116,7 +1116,7 @@ public class TestQPHelper extends LuceneTestCase { public void testStopwords() throws Exception { StandardQueryParser qp = new StandardQueryParser(); CharacterRunAutomaton stopSet = new CharacterRunAutomaton(new RegExp("the|foo").toAutomaton()); - qp.setAnalyzer(new MockAnalyzer(MockTokenizer.SIMPLE, true, stopSet, true)); + qp.setAnalyzer(new MockAnalyzer(random, MockTokenizer.SIMPLE, true, stopSet, true)); Query result = qp.parse("a:the OR a:foo", "a"); assertNotNull("result is null and it shouldn't be", result); @@ -1140,7 +1140,7 @@ public class TestQPHelper extends LuceneTestCase { public void testPositionIncrement() throws Exception { StandardQueryParser qp = new StandardQueryParser(); qp.setAnalyzer( - new MockAnalyzer(MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET, true)); + new MockAnalyzer(random, MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET, true)); qp.setEnablePositionIncrements(true); @@ -1161,7 +1161,7 @@ public class TestQPHelper extends LuceneTestCase { public void testMatchAllDocs() throws Exception { StandardQueryParser qp = new StandardQueryParser(); - qp.setAnalyzer(new MockAnalyzer(MockTokenizer.WHITESPACE, false)); + qp.setAnalyzer(new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)); assertEquals(new MatchAllDocsQuery(), qp.parse("*:*", "field")); assertEquals(new MatchAllDocsQuery(), qp.parse("(*:*)", "field")); @@ -1173,7 +1173,7 @@ public class TestQPHelper extends LuceneTestCase { private void assertHits(int expected, String query, IndexSearcher is) throws IOException, QueryNodeException { StandardQueryParser qp = new StandardQueryParser(); - qp.setAnalyzer(new MockAnalyzer(MockTokenizer.WHITESPACE, false)); + qp.setAnalyzer(new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)); qp.setLocale(Locale.ENGLISH); Query q = qp.parse(query, "date"); diff --git a/lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/surround/query/SingleFieldTestDb.java b/lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/surround/query/SingleFieldTestDb.java index f526d07d5c9..dffb925ed6b 100644 --- a/lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/surround/query/SingleFieldTestDb.java +++ b/lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/surround/query/SingleFieldTestDb.java @@ -41,7 +41,7 @@ public class SingleFieldTestDb { fieldName = fName; IndexWriter writer = new IndexWriter(db, new IndexWriterConfig( Version.LUCENE_CURRENT, - new MockAnalyzer())); + new MockAnalyzer(random))); for (int j = 0; j < docs.length; j++) { Document d = new Document(); d.add(new Field(fieldName, docs[j], Field.Store.NO, Field.Index.ANALYZED)); diff --git a/lucene/contrib/spatial/src/test/org/apache/lucene/spatial/tier/TestCartesian.java b/lucene/contrib/spatial/src/test/org/apache/lucene/spatial/tier/TestCartesian.java index 3f417f40844..06bb23fa35e 100644 --- a/lucene/contrib/spatial/src/test/org/apache/lucene/spatial/tier/TestCartesian.java +++ b/lucene/contrib/spatial/src/test/org/apache/lucene/spatial/tier/TestCartesian.java @@ -71,7 +71,7 @@ public class TestCartesian extends LuceneTestCase { super.setUp(); directory = newDirectory(); - IndexWriter writer = new IndexWriter(directory, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer())); + IndexWriter writer = new IndexWriter(directory, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random))); setUpPlotter( 2, 15); diff --git a/lucene/contrib/spatial/src/test/org/apache/lucene/spatial/tier/TestDistance.java b/lucene/contrib/spatial/src/test/org/apache/lucene/spatial/tier/TestDistance.java index 7aaa919a335..b23d3b382e6 100644 --- a/lucene/contrib/spatial/src/test/org/apache/lucene/spatial/tier/TestDistance.java +++ b/lucene/contrib/spatial/src/test/org/apache/lucene/spatial/tier/TestDistance.java @@ -47,7 +47,7 @@ public class TestDistance extends LuceneTestCase { public void setUp() throws Exception { super.setUp(); directory = newDirectory(); - writer = new IndexWriter(directory, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer())); + writer = new IndexWriter(directory, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random))); addData(writer); } diff --git a/lucene/contrib/spellchecker/src/java/org/apache/lucene/search/spell/SpellChecker.java b/lucene/contrib/spellchecker/src/java/org/apache/lucene/search/spell/SpellChecker.java index a4ed8407f2f..bfeae31581e 100755 --- a/lucene/contrib/spellchecker/src/java/org/apache/lucene/search/spell/SpellChecker.java +++ b/lucene/contrib/spellchecker/src/java/org/apache/lucene/search/spell/SpellChecker.java @@ -29,7 +29,7 @@ import org.apache.lucene.document.Field; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; -import org.apache.lucene.index.LogMergePolicy; +import org.apache.lucene.index.TieredMergePolicy; import org.apache.lucene.index.Term; import org.apache.lucene.index.IndexWriterConfig.OpenMode; import org.apache.lucene.index.Terms; @@ -45,7 +45,6 @@ import org.apache.lucene.store.Directory; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.ReaderUtil; import org.apache.lucene.util.Version; -import org.apache.lucene.util.VirtualMethod; /** *

@@ -508,7 +507,7 @@ public class SpellChecker implements java.io.Closeable { ensureOpen(); final Directory dir = this.spellIndex; final IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(Version.LUCENE_CURRENT, new WhitespaceAnalyzer(Version.LUCENE_CURRENT)).setRAMBufferSizeMB(ramMB)); - ((LogMergePolicy) writer.getConfig().getMergePolicy()).setMergeFactor(mergeFactor); + ((TieredMergePolicy) writer.getConfig().getMergePolicy()).setMaxMergeAtOnce(mergeFactor); IndexSearcher indexSearcher = obtainSearcher(); final List termsEnums = new ArrayList(); diff --git a/lucene/contrib/spellchecker/src/test/org/apache/lucene/search/spell/TestDirectSpellChecker.java b/lucene/contrib/spellchecker/src/test/org/apache/lucene/search/spell/TestDirectSpellChecker.java index ff9975d7381..3de4a91959a 100644 --- a/lucene/contrib/spellchecker/src/test/org/apache/lucene/search/spell/TestDirectSpellChecker.java +++ b/lucene/contrib/spellchecker/src/test/org/apache/lucene/search/spell/TestDirectSpellChecker.java @@ -35,7 +35,7 @@ public class TestDirectSpellChecker extends LuceneTestCase { spellChecker.setMinQueryLength(0); Directory dir = newDirectory(); RandomIndexWriter writer = new RandomIndexWriter(random, dir, - new MockAnalyzer(MockTokenizer.SIMPLE, true)); + new MockAnalyzer(random, MockTokenizer.SIMPLE, true)); for (int i = 0; i < 20; i++) { Document doc = new Document(); @@ -93,7 +93,7 @@ public class TestDirectSpellChecker extends LuceneTestCase { public void testOptions() throws Exception { Directory dir = newDirectory(); RandomIndexWriter writer = new RandomIndexWriter(random, dir, - new MockAnalyzer(MockTokenizer.SIMPLE, true)); + new MockAnalyzer(random, MockTokenizer.SIMPLE, true)); Document doc = new Document(); doc.add(newField("text", "foobar", Field.Store.NO, Field.Index.ANALYZED)); diff --git a/lucene/contrib/spellchecker/src/test/org/apache/lucene/search/spell/TestLuceneDictionary.java b/lucene/contrib/spellchecker/src/test/org/apache/lucene/search/spell/TestLuceneDictionary.java index 943b6d6daf0..e5cc7684f6d 100644 --- a/lucene/contrib/spellchecker/src/test/org/apache/lucene/search/spell/TestLuceneDictionary.java +++ b/lucene/contrib/spellchecker/src/test/org/apache/lucene/search/spell/TestLuceneDictionary.java @@ -46,7 +46,7 @@ public class TestLuceneDictionary extends LuceneTestCase { public void setUp() throws Exception { super.setUp(); store = newDirectory(); - IndexWriter writer = new IndexWriter(store, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(MockTokenizer.WHITESPACE, false))); + IndexWriter writer = new IndexWriter(store, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random, MockTokenizer.WHITESPACE, false))); Document doc; diff --git a/lucene/contrib/spellchecker/src/test/org/apache/lucene/search/spell/TestSpellChecker.java b/lucene/contrib/spellchecker/src/test/org/apache/lucene/search/spell/TestSpellChecker.java index ad753068edc..85313a0a811 100755 --- a/lucene/contrib/spellchecker/src/test/org/apache/lucene/search/spell/TestSpellChecker.java +++ b/lucene/contrib/spellchecker/src/test/org/apache/lucene/search/spell/TestSpellChecker.java @@ -54,7 +54,7 @@ public class TestSpellChecker extends LuceneTestCase { //create a user index userindex = newDirectory(); IndexWriter writer = new IndexWriter(userindex, new IndexWriterConfig( - TEST_VERSION_CURRENT, new MockAnalyzer())); + TEST_VERSION_CURRENT, new MockAnalyzer(random))); for (int i = 0; i < 1000; i++) { Document doc = new Document(); diff --git a/lucene/contrib/wordnet/src/java/org/apache/lucene/wordnet/Syns2Index.java b/lucene/contrib/wordnet/src/java/org/apache/lucene/wordnet/Syns2Index.java index 437b7e98973..abe77850bd5 100644 --- a/lucene/contrib/wordnet/src/java/org/apache/lucene/wordnet/Syns2Index.java +++ b/lucene/contrib/wordnet/src/java/org/apache/lucene/wordnet/Syns2Index.java @@ -36,7 +36,7 @@ import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; -import org.apache.lucene.index.LogMergePolicy; +import org.apache.lucene.index.TieredMergePolicy; import org.apache.lucene.index.IndexWriterConfig.OpenMode; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.util.Version; @@ -250,7 +250,7 @@ public class Syns2Index // override the specific index if it already exists IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig( Version.LUCENE_CURRENT, ana).setOpenMode(OpenMode.CREATE)); - ((LogMergePolicy) writer.getConfig().getMergePolicy()).setUseCompoundFile(true); // why? + ((TieredMergePolicy) writer.getConfig().getMergePolicy()).setUseCompoundFile(true); // why? Iterator i1 = word2Nums.keySet().iterator(); while (i1.hasNext()) // for each word { diff --git a/lucene/contrib/wordnet/src/test/org/apache/lucene/wordnet/TestWordnet.java b/lucene/contrib/wordnet/src/test/org/apache/lucene/wordnet/TestWordnet.java index 52171479992..ccd855931a5 100644 --- a/lucene/contrib/wordnet/src/test/org/apache/lucene/wordnet/TestWordnet.java +++ b/lucene/contrib/wordnet/src/test/org/apache/lucene/wordnet/TestWordnet.java @@ -29,6 +29,7 @@ import org.apache.lucene.search.Query; import org.apache.lucene.search.TermQuery; import org.apache.lucene.store.Directory; import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.util._TestUtil; public class TestWordnet extends LuceneTestCase { private IndexSearcher searcher; @@ -42,6 +43,7 @@ public class TestWordnet extends LuceneTestCase { // create a temporary synonym index File testFile = getDataFile("testSynonyms.txt"); String commandLineArgs[] = { testFile.getAbsolutePath(), storePathName }; + _TestUtil.rmDir(new File(storePathName)); try { Syns2Index.main(commandLineArgs); @@ -61,7 +63,7 @@ public class TestWordnet extends LuceneTestCase { private void assertExpandsTo(String term, String expected[]) throws IOException { Query expandedQuery = SynExpand.expand(term, searcher, new - MockAnalyzer(), "field", 1F); + MockAnalyzer(random), "field", 1F); BooleanQuery expectedQuery = new BooleanQuery(); for (String t : expected) expectedQuery.add(new TermQuery(new Term("field", t)), @@ -71,8 +73,12 @@ public class TestWordnet extends LuceneTestCase { @Override public void tearDown() throws Exception { - searcher.close(); - dir.close(); + if (searcher != null) { + searcher.close(); + } + if (dir != null) { + dir.close(); + } rmDir(storePathName); // delete our temporary synonym index super.tearDown(); } diff --git a/lucene/contrib/xml-query-parser/src/test/org/apache/lucene/xmlparser/TestParser.java b/lucene/contrib/xml-query-parser/src/test/org/apache/lucene/xmlparser/TestParser.java index ae5f02be532..ffe82630b65 100644 --- a/lucene/contrib/xml-query-parser/src/test/org/apache/lucene/xmlparser/TestParser.java +++ b/lucene/contrib/xml-query-parser/src/test/org/apache/lucene/xmlparser/TestParser.java @@ -49,7 +49,7 @@ public class TestParser extends LuceneTestCase { @BeforeClass public static void beforeClass() throws Exception { // TODO: rewrite test (this needs to set QueryParser.enablePositionIncrements, too, for work with CURRENT): - Analyzer analyzer=new MockAnalyzer(MockTokenizer.WHITESPACE, true, MockTokenFilter.ENGLISH_STOPSET, false); + Analyzer analyzer=new MockAnalyzer(random, MockTokenizer.WHITESPACE, true, MockTokenFilter.ENGLISH_STOPSET, false); //initialize the parser builder=new CorePlusExtensionsParser("contents",analyzer); @@ -187,7 +187,8 @@ public class TestParser extends LuceneTestCase { } public void testDuplicateFilterQueryXML() throws ParserException, IOException { - Assume.assumeTrue(searcher.getIndexReader().getSequentialSubReaders().length == 1); + Assume.assumeTrue(searcher.getIndexReader().getSequentialSubReaders() == null || + searcher.getIndexReader().getSequentialSubReaders().length == 1); Query q=parse("DuplicateFilterQuery.xml"); int h = searcher.search(q, null, 1000).totalHits; assertEquals("DuplicateFilterQuery should produce 1 result ", 1,h); diff --git a/lucene/contrib/xml-query-parser/src/test/org/apache/lucene/xmlparser/TestQueryTemplateManager.java b/lucene/contrib/xml-query-parser/src/test/org/apache/lucene/xmlparser/TestQueryTemplateManager.java index 9d87f8ae03f..f5cb65d9901 100644 --- a/lucene/contrib/xml-query-parser/src/test/org/apache/lucene/xmlparser/TestQueryTemplateManager.java +++ b/lucene/contrib/xml-query-parser/src/test/org/apache/lucene/xmlparser/TestQueryTemplateManager.java @@ -44,7 +44,7 @@ import org.xml.sax.SAXException; public class TestQueryTemplateManager extends LuceneTestCase { CoreParser builder; - Analyzer analyzer=new MockAnalyzer(); + Analyzer analyzer=new MockAnalyzer(random); private IndexSearcher searcher; private Directory dir; diff --git a/lucene/lib/ant-junit-LICENSE.txt b/lucene/lib/ant-junit-LICENSE-ASL.txt similarity index 100% rename from lucene/lib/ant-junit-LICENSE.txt rename to lucene/lib/ant-junit-LICENSE-ASL.txt diff --git a/lucene/src/java/org/apache/lucene/index/BufferedDeletes.java b/lucene/src/java/org/apache/lucene/index/BufferedDeletes.java index c72a1f6b0a3..ae544cbaf86 100644 --- a/lucene/src/java/org/apache/lucene/index/BufferedDeletes.java +++ b/lucene/src/java/org/apache/lucene/index/BufferedDeletes.java @@ -72,13 +72,18 @@ class BufferedDeletes { public static final Integer MAX_INT = Integer.valueOf(Integer.MAX_VALUE); - final AtomicLong bytesUsed = new AtomicLong(); + final AtomicLong bytesUsed; private final static boolean VERBOSE_DELETES = false; long gen; - public BufferedDeletes(boolean sortTerms) { + this(sortTerms, new AtomicLong()); + } + + BufferedDeletes(boolean sortTerms, AtomicLong bytesUsed) { + assert bytesUsed != null; + this.bytesUsed = bytesUsed; if (sortTerms) { terms = new TreeMap(); } else { diff --git a/lucene/src/java/org/apache/lucene/index/BufferedDeletesStream.java b/lucene/src/java/org/apache/lucene/index/BufferedDeletesStream.java index 692496ba406..11e55734046 100644 --- a/lucene/src/java/org/apache/lucene/index/BufferedDeletesStream.java +++ b/lucene/src/java/org/apache/lucene/index/BufferedDeletesStream.java @@ -33,8 +33,8 @@ import org.apache.lucene.search.Query; import org.apache.lucene.search.Scorer; import org.apache.lucene.search.Weight; -/* Tracks the stream of {@link BuffereDeletes}. - * When DocumensWriter flushes, its buffered +/* Tracks the stream of {@link BufferedDeletes}. + * When DocumentsWriterPerThread flushes, its buffered * deletes are appended to this stream. We later * apply these deletes (resolve them to the actual * docIDs, per segment) when a merge is started @@ -60,7 +60,7 @@ class BufferedDeletesStream { // used only by assert private Term lastDeleteTerm; - + private PrintStream infoStream; private final AtomicLong bytesUsed = new AtomicLong(); private final AtomicInteger numTerms = new AtomicInteger(); @@ -75,26 +75,36 @@ class BufferedDeletesStream { infoStream.println("BD " + messageID + " [" + new Date() + "; " + Thread.currentThread().getName() + "]: " + message); } } - + public synchronized void setInfoStream(PrintStream infoStream) { this.infoStream = infoStream; } // Appends a new packet of buffered deletes to the stream, // setting its generation: - public synchronized void push(FrozenBufferedDeletes packet) { + public synchronized long push(FrozenBufferedDeletes packet) { + /* + * The insert operation must be atomic. If we let threads increment the gen + * and push the packet afterwards we risk that packets are out of order. + * With DWPT this is possible if two or more flushes are racing for pushing + * updates. If the pushed packets get our of order would loose documents + * since deletes are applied to the wrong segments. + */ + packet.setDelGen(nextGen++); assert packet.any(); - assert checkDeleteStats(); - assert packet.gen < nextGen; + assert checkDeleteStats(); + assert packet.delGen() < nextGen; + assert deletes.isEmpty() || deletes.get(deletes.size()-1).delGen() < packet.delGen() : "Delete packets must be in order"; deletes.add(packet); numTerms.addAndGet(packet.numTermDeletes); bytesUsed.addAndGet(packet.bytesUsed); if (infoStream != null) { - message("push deletes " + packet + " delGen=" + packet.gen + " packetCount=" + deletes.size()); + message("push deletes " + packet + " delGen=" + packet.delGen() + " packetCount=" + deletes.size()); } - assert checkDeleteStats(); + assert checkDeleteStats(); + return packet.delGen(); } - + public synchronized void clear() { deletes.clear(); nextGen = 1; @@ -132,7 +142,7 @@ class BufferedDeletesStream { } // Sorts SegmentInfos from smallest to biggest bufferedDelGen: - private static final Comparator sortByDelGen = new Comparator() { + private static final Comparator sortSegInfoByDelGen = new Comparator() { // @Override -- not until Java 1.6 public int compare(SegmentInfo si1, SegmentInfo si2) { final long cmp = si1.getBufferedDeletesGen() - si2.getBufferedDeletesGen(); @@ -147,10 +157,10 @@ class BufferedDeletesStream { @Override public boolean equals(Object other) { - return sortByDelGen == other; + return sortSegInfoByDelGen == other; } }; - + /** Resolves the buffered deleted Term/Query/docIDs, into * actual deleted docIDs in the deletedDocs BitVector for * each SegmentReader. */ @@ -174,7 +184,7 @@ class BufferedDeletesStream { SegmentInfos infos2 = new SegmentInfos(); infos2.addAll(infos); - Collections.sort(infos2, sortByDelGen); + Collections.sort(infos2, sortSegInfoByDelGen); BufferedDeletes coalescedDeletes = null; boolean anyNewDeletes = false; @@ -191,19 +201,30 @@ class BufferedDeletesStream { final SegmentInfo info = infos2.get(infosIDX); final long segGen = info.getBufferedDeletesGen(); - if (packet != null && segGen < packet.gen) { + if (packet != null && segGen < packet.delGen()) { //System.out.println(" coalesce"); if (coalescedDeletes == null) { coalescedDeletes = new BufferedDeletes(true); } - coalescedDeletes.update(packet); + if (!packet.isSegmentPrivate) { + /* + * Only coalesce if we are NOT on a segment private del packet: the segment private del packet + * must only applied to segments with the same delGen. Yet, if a segment is already deleted + * from the SI since it had no more documents remaining after some del packets younger than + * its segPrivate packet (higher delGen) have been applied, the segPrivate packet has not been + * removed. + */ + coalescedDeletes.update(packet); + } + delIDX--; - } else if (packet != null && segGen == packet.gen) { + } else if (packet != null && segGen == packet.delGen()) { + assert packet.isSegmentPrivate : "Packet and Segments deletegen can only match on a segment private del packet"; //System.out.println(" eq"); // Lock order: IW -> BD -> RP assert readerPool.infoIsLive(info); - SegmentReader reader = readerPool.get(info, false); + final SegmentReader reader = readerPool.get(info, false); int delCount = 0; final boolean segAllDeletes; try { @@ -213,7 +234,7 @@ class BufferedDeletesStream { delCount += applyQueryDeletes(coalescedDeletes.queriesIterable(), reader); } //System.out.println(" del exact"); - // Don't delete by Term here; DocumentsWriter + // Don't delete by Term here; DocumentsWriterPerThread // already did that on flush: delCount += applyQueryDeletes(packet.queriesIterable(), reader); segAllDeletes = reader.numDocs() == 0; @@ -236,7 +257,12 @@ class BufferedDeletesStream { if (coalescedDeletes == null) { coalescedDeletes = new BufferedDeletes(true); } - coalescedDeletes.update(packet); + + /* + * Since we are on a segment private del packet we must not + * update the coalescedDeletes here! We can simply advance to the + * next packet and seginfo. + */ delIDX--; infosIDX--; info.setBufferedDeletesGen(nextGen); @@ -281,11 +307,11 @@ class BufferedDeletesStream { message("applyDeletes took " + (System.currentTimeMillis()-t0) + " msec"); } // assert infos != segmentInfos || !any() : "infos=" + infos + " segmentInfos=" + segmentInfos + " any=" + any; - + return new ApplyDeletesResult(anyNewDeletes, nextGen++, allDeleted); } - public synchronized long getNextGen() { + synchronized long getNextGen() { return nextGen++; } @@ -303,10 +329,9 @@ class BufferedDeletesStream { if (infoStream != null) { message("prune sis=" + segmentInfos + " minGen=" + minGen + " packetCount=" + deletes.size()); } - final int limit = deletes.size(); for(int delIDX=0;delIDX= minGen) { + if (deletes.get(delIDX).delGen() >= minGen) { prune(delIDX); assert checkDeleteStats(); return; @@ -345,10 +370,10 @@ class BufferedDeletesStream { } TermsEnum termsEnum = null; - + String currentField = null; DocsEnum docs = null; - + assert checkDeleteTerm(null); for (Term term : termsIter) { @@ -372,10 +397,10 @@ class BufferedDeletesStream { assert checkDeleteTerm(term); // System.out.println(" term=" + term); - + if (termsEnum.seek(term.bytes(), false) == TermsEnum.SeekStatus.FOUND) { DocsEnum docsEnum = termsEnum.docs(reader.getDeletedDocs(), docs); - + if (docsEnum != null) { while (true) { final int docID = docsEnum.nextDoc(); @@ -401,7 +426,7 @@ class BufferedDeletesStream { public final Query query; public final int limit; public QueryAndLimit(Query query, int limit) { - this.query = query; + this.query = query; this.limit = limit; } } @@ -449,7 +474,7 @@ class BufferedDeletesStream { lastDeleteTerm = term; return true; } - + // only for assert private boolean checkDeleteStats() { int numTerms2 = 0; diff --git a/lucene/src/java/org/apache/lucene/index/ByteSliceWriter.java b/lucene/src/java/org/apache/lucene/index/ByteSliceWriter.java index 5355dee4f83..5c8b921f087 100644 --- a/lucene/src/java/org/apache/lucene/index/ByteSliceWriter.java +++ b/lucene/src/java/org/apache/lucene/index/ByteSliceWriter.java @@ -81,6 +81,6 @@ final class ByteSliceWriter extends DataOutput { } public int getAddress() { - return upto + (offset0 & DocumentsWriter.BYTE_BLOCK_NOT_MASK); + return upto + (offset0 & DocumentsWriterPerThread.BYTE_BLOCK_NOT_MASK); } } \ No newline at end of file diff --git a/lucene/src/java/org/apache/lucene/index/CheckIndex.java b/lucene/src/java/org/apache/lucene/index/CheckIndex.java index ca8f357aba2..61b3fc07da0 100644 --- a/lucene/src/java/org/apache/lucene/index/CheckIndex.java +++ b/lucene/src/java/org/apache/lucene/index/CheckIndex.java @@ -661,10 +661,13 @@ public class CheckIndex { status.termCount++; final DocsEnum docs2; + final boolean hasPositions; if (postings != null) { docs2 = postings; + hasPositions = true; } else { docs2 = docs; + hasPositions = false; } int lastDoc = -1; @@ -733,6 +736,67 @@ public class CheckIndex { throw new RuntimeException("term " + term + " totalTermFreq=" + totalTermFreq2 + " != recomputed totalTermFreq=" + totalTermFreq); } } + + // Test skipping + if (docFreq >= 16) { + if (hasPositions) { + for(int idx=0;idx<7;idx++) { + final int skipDocID = (int) (((idx+1)*(long) maxDoc)/8); + postings = terms.docsAndPositions(delDocs, postings); + final int docID = postings.advance(skipDocID); + if (docID == DocsEnum.NO_MORE_DOCS) { + break; + } else { + if (docID < skipDocID) { + throw new RuntimeException("term " + term + ": advance(docID=" + skipDocID + ") returned docID=" + docID); + } + final int freq = postings.freq(); + if (freq <= 0) { + throw new RuntimeException("termFreq " + freq + " is out of bounds"); + } + int lastPosition = -1; + for(int posUpto=0;posUpto threads, final SegmentWriteState state) throws IOException; + abstract void processDocument(FieldInfos fieldInfos) throws IOException; + abstract void finishDocument() throws IOException; + abstract void flush(final SegmentWriteState state) throws IOException; abstract void abort(); abstract boolean freeRAM(); + abstract void doAfterFlush(); } diff --git a/lucene/src/java/org/apache/lucene/index/DocFieldConsumer.java b/lucene/src/java/org/apache/lucene/index/DocFieldConsumer.java index 2abc0bb5531..18555300003 100644 --- a/lucene/src/java/org/apache/lucene/index/DocFieldConsumer.java +++ b/lucene/src/java/org/apache/lucene/index/DocFieldConsumer.java @@ -18,22 +18,25 @@ package org.apache.lucene.index; */ import java.io.IOException; -import java.util.Collection; import java.util.Map; abstract class DocFieldConsumer { - /** Called when DocumentsWriter decides to create a new + /** Called when DocumentsWriterPerThread decides to create a new * segment */ - abstract void flush(Map> threadsAndFields, SegmentWriteState state) throws IOException; + abstract void flush(Map fieldsToFlush, SegmentWriteState state) throws IOException; /** Called when an aborting exception is hit */ abstract void abort(); - /** Add a new thread */ - abstract DocFieldConsumerPerThread addThread(DocFieldProcessorPerThread docFieldProcessorPerThread) throws IOException; - - /** Called when DocumentsWriter is using too much RAM. + /** Called when DocumentsWriterPerThread is using too much RAM. * The consumer should free RAM, if possible, returning * true if any RAM was in fact freed. */ abstract boolean freeRAM(); - } + + abstract void startDocument() throws IOException; + + abstract DocFieldConsumerPerField addField(FieldInfo fi); + + abstract void finishDocument() throws IOException; + +} diff --git a/lucene/src/java/org/apache/lucene/index/DocFieldConsumerPerField.java b/lucene/src/java/org/apache/lucene/index/DocFieldConsumerPerField.java index f70e815d8d5..960ea59eae8 100644 --- a/lucene/src/java/org/apache/lucene/index/DocFieldConsumerPerField.java +++ b/lucene/src/java/org/apache/lucene/index/DocFieldConsumerPerField.java @@ -24,4 +24,5 @@ abstract class DocFieldConsumerPerField { /** Processes all occurrences of a single field */ abstract void processFields(Fieldable[] fields, int count) throws IOException; abstract void abort(); + abstract FieldInfo getFieldInfo(); } diff --git a/lucene/src/java/org/apache/lucene/index/DocFieldConsumers.java b/lucene/src/java/org/apache/lucene/index/DocFieldConsumers.java new file mode 100644 index 00000000000..3d20248ff61 --- /dev/null +++ b/lucene/src/java/org/apache/lucene/index/DocFieldConsumers.java @@ -0,0 +1,90 @@ +package org.apache.lucene.index; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.util.HashMap; +import java.util.Map; + +/** This is just a "splitter" class: it lets you wrap two + * DocFieldConsumer instances as a single consumer. */ + +final class DocFieldConsumers extends DocFieldConsumer { + final DocFieldConsumer one; + final DocFieldConsumer two; + final DocumentsWriterPerThread.DocState docState; + + public DocFieldConsumers(DocFieldProcessor processor, DocFieldConsumer one, DocFieldConsumer two) { + this.one = one; + this.two = two; + this.docState = processor.docState; + } + + @Override + public void flush(Map fieldsToFlush, SegmentWriteState state) throws IOException { + + Map oneFieldsToFlush = new HashMap(); + Map twoFieldsToFlush = new HashMap(); + + for (Map.Entry fieldToFlush : fieldsToFlush.entrySet()) { + DocFieldConsumersPerField perField = (DocFieldConsumersPerField) fieldToFlush.getValue(); + oneFieldsToFlush.put(fieldToFlush.getKey(), perField.one); + twoFieldsToFlush.put(fieldToFlush.getKey(), perField.two); + } + + one.flush(oneFieldsToFlush, state); + two.flush(twoFieldsToFlush, state); + } + + @Override + public void abort() { + try { + one.abort(); + } finally { + two.abort(); + } + } + + @Override + public boolean freeRAM() { + boolean any = one.freeRAM(); + any |= two.freeRAM(); + return any; + } + + @Override + public void finishDocument() throws IOException { + try { + one.finishDocument(); + } finally { + two.finishDocument(); + } + } + + @Override + public void startDocument() throws IOException { + one.startDocument(); + two.startDocument(); + } + + @Override + public DocFieldConsumerPerField addField(FieldInfo fi) { + return new DocFieldConsumersPerField(this, fi, one.addField(fi), two.addField(fi)); + } + +} diff --git a/lucene/src/java/org/apache/lucene/index/FreqProxTermsWriterPerThread.java b/lucene/src/java/org/apache/lucene/index/DocFieldConsumersPerField.java similarity index 52% rename from lucene/src/java/org/apache/lucene/index/FreqProxTermsWriterPerThread.java rename to lucene/src/java/org/apache/lucene/index/DocFieldConsumersPerField.java index 87af8608174..5abf003d5a1 100644 --- a/lucene/src/java/org/apache/lucene/index/FreqProxTermsWriterPerThread.java +++ b/lucene/src/java/org/apache/lucene/index/DocFieldConsumersPerField.java @@ -17,29 +17,40 @@ package org.apache.lucene.index; * limitations under the License. */ -final class FreqProxTermsWriterPerThread extends TermsHashConsumerPerThread { - final TermsHashPerThread termsHashPerThread; - final DocumentsWriter.DocState docState; +import java.io.IOException; +import org.apache.lucene.document.Fieldable; - public FreqProxTermsWriterPerThread(TermsHashPerThread perThread) { - docState = perThread.docState; - termsHashPerThread = perThread; - } - - @Override - public TermsHashConsumerPerField addField(TermsHashPerField termsHashPerField, FieldInfo fieldInfo) { - return new FreqProxTermsWriterPerField(termsHashPerField, this, fieldInfo); +final class DocFieldConsumersPerField extends DocFieldConsumerPerField { + + final DocFieldConsumerPerField one; + final DocFieldConsumerPerField two; + final DocFieldConsumers parent; + final FieldInfo fieldInfo; + + public DocFieldConsumersPerField(DocFieldConsumers parent, FieldInfo fi, DocFieldConsumerPerField one, DocFieldConsumerPerField two) { + this.parent = parent; + this.one = one; + this.two = two; + this.fieldInfo = fi; } @Override - void startDocument() { + public void processFields(Fieldable[] fields, int count) throws IOException { + one.processFields(fields, count); + two.processFields(fields, count); } @Override - DocumentsWriter.DocWriter finishDocument() { - return null; + public void abort() { + try { + one.abort(); + } finally { + two.abort(); + } } @Override - public void abort() {} + FieldInfo getFieldInfo() { + return fieldInfo; + } } diff --git a/lucene/src/java/org/apache/lucene/index/DocFieldProcessor.java b/lucene/src/java/org/apache/lucene/index/DocFieldProcessor.java index a566e72f9bb..3f7faf62c20 100644 --- a/lucene/src/java/org/apache/lucene/index/DocFieldProcessor.java +++ b/lucene/src/java/org/apache/lucene/index/DocFieldProcessor.java @@ -20,13 +20,20 @@ package org.apache.lucene.index; import java.io.IOException; import java.util.Collection; import java.util.HashMap; +import java.util.HashSet; +import java.util.List; import java.util.Map; -import org.apache.lucene.index.codecs.FieldsConsumer; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Fieldable; +import org.apache.lucene.index.DocumentsWriterPerThread.DocState; +import org.apache.lucene.index.codecs.Codec; +import org.apache.lucene.index.codecs.PerDocConsumer; import org.apache.lucene.index.codecs.docvalues.DocValuesConsumer; import org.apache.lucene.index.values.PerDocFieldValues; import org.apache.lucene.store.Directory; + /** * This is a DocConsumer that gathers all fields under the * same name, and calls per-field consumers to process field @@ -37,66 +44,39 @@ import org.apache.lucene.store.Directory; final class DocFieldProcessor extends DocConsumer { - final DocumentsWriter docWriter; final DocFieldConsumer consumer; final StoredFieldsWriter fieldsWriter; - final private Map docValues = new HashMap(); - private FieldsConsumer fieldsConsumer; // TODO this should be encapsulated in DocumentsWriter - private SegmentWriteState docValuesConsumerState; // TODO this should be encapsulated in DocumentsWriter + // Holds all fields seen in current doc + DocFieldProcessorPerField[] fields = new DocFieldProcessorPerField[1]; + int fieldCount; - synchronized DocValuesConsumer docValuesConsumer(Directory dir, - String segment, String name, PerDocFieldValues values, FieldInfo fieldInfo) - throws IOException { - DocValuesConsumer valuesConsumer; - if ((valuesConsumer = docValues.get(name)) == null) { - fieldInfo.setDocValues(values.type()); + // Hash table for all fields ever seen + DocFieldProcessorPerField[] fieldHash = new DocFieldProcessorPerField[2]; + int hashMask = 1; + int totalFieldCount; - if(fieldsConsumer == null) { - /* TODO (close to no commit) -- this is a hack and only works since DocValuesCodec supports initializing the FieldsConsumer twice. - * we need to find a way that allows us to obtain a FieldsConsumer per DocumentsWriter. Currently some codecs rely on - * the SegmentsWriteState passed in right at the moment when the segment is flushed (doccount etc) but we need the consumer earlier - * to support docvalues and later on stored fields too. - */ - docValuesConsumerState = docWriter.segWriteState(false); - fieldsConsumer = docValuesConsumerState.segmentCodecs.codec().fieldsConsumer(docValuesConsumerState); - } - valuesConsumer = fieldsConsumer.addValuesField(fieldInfo); - docValues.put(name, valuesConsumer); - } - return valuesConsumer; + float docBoost; + int fieldGen; + final DocumentsWriterPerThread.DocState docState; - } - - - public DocFieldProcessor(DocumentsWriter docWriter, DocFieldConsumer consumer) { - this.docWriter = docWriter; + public DocFieldProcessor(DocumentsWriterPerThread docWriter, DocFieldConsumer consumer) { + this.docState = docWriter.docState; this.consumer = consumer; fieldsWriter = new StoredFieldsWriter(docWriter); } @Override - public void flush(Collection threads, SegmentWriteState state) throws IOException { + public void flush(SegmentWriteState state) throws IOException { - Map> childThreadsAndFields = new HashMap>(); - for ( DocConsumerPerThread thread : threads) { - DocFieldProcessorPerThread perThread = (DocFieldProcessorPerThread) thread; - childThreadsAndFields.put(perThread.consumer, perThread.fields()); + Map childFields = new HashMap(); + Collection fields = fields(); + for (DocFieldConsumerPerField f : fields) { + childFields.put(f.getFieldInfo(), f); } + fieldsWriter.flush(state); - consumer.flush(childThreadsAndFields, state); - - for(DocValuesConsumer p : docValues.values()) { - if (p != null) { - p.finish(state.numDocs); - } - } - docValues.clear(); - if(fieldsConsumer != null) { - fieldsConsumer.close(); // TODO remove this once docvalues are fully supported by codecs - docValuesConsumerState = null; - fieldsConsumer = null; - } + consumer.flush(childFields, state); // Important to save after asking consumer to flush so // consumer can alter the FieldInfo* if necessary. EG, @@ -104,12 +84,35 @@ final class DocFieldProcessor extends DocConsumer { // FieldInfo.storePayload. final String fileName = IndexFileNames.segmentFileName(state.segmentName, "", IndexFileNames.FIELD_INFOS_EXTENSION); state.fieldInfos.write(state.directory, fileName); + for (DocValuesConsumer consumers : docValues.values()) { + consumers.finish(state.numDocs); + }; } @Override public void abort() { - fieldsWriter.abort(); - consumer.abort(); + for(int i=0;i fields() { + Collection fields = new HashSet(); + for(int i=0;i fieldHash.length; + + final DocFieldProcessorPerField newHashArray[] = new DocFieldProcessorPerField[newHashSize]; + + // Rehash + int newHashMask = newHashSize-1; + for(int j=0;j docFields = doc.getFields(); + final int numDocFields = docFields.size(); + + // Absorb any new fields first seen in this document. + // Also absorb any changes to fields we had already + // seen before (eg suddenly turning on norms or + // vectors, etc.): + + for(int i=0;i= fieldHash.length/2) + rehash(); + } else { + fieldInfos.addOrUpdate(fp.fieldInfo.name, field.isIndexed(), field.isTermVectorStored(), + field.isStorePositionWithTermVector(), field.isStoreOffsetWithTermVector(), + field.getOmitNorms(), false, field.getOmitTermFreqAndPositions(), field.docValuesType()); + } + + if (thisFieldGen != fp.lastGen) { + + // First time we're seeing this field for this doc + fp.fieldCount = 0; + + if (fieldCount == fields.length) { + final int newSize = fields.length*2; + DocFieldProcessorPerField newArray[] = new DocFieldProcessorPerField[newSize]; + System.arraycopy(fields, 0, newArray, 0, fieldCount); + fields = newArray; + } + + fields[fieldCount++] = fp; + fp.lastGen = thisFieldGen; + } + + fp.addField(field); + + if (field.isStored()) { + fieldsWriter.addField(field, fp.fieldInfo); + } + if (field.hasDocValues()) { + final DocValuesConsumer docValuesConsumer = docValuesConsumer(docState, fp.fieldInfo, fieldInfos); + docValuesConsumer.add(docState.docID, field.getDocValues()); + } + } + + // If we are writing vectors then we must visit + // fields in sorted order so they are written in + // sorted order. TODO: we actually only need to + // sort the subset of fields that have vectors + // enabled; we could save [small amount of] CPU + // here. + quickSort(fields, 0, fieldCount-1); + + for(int i=0;i= hi) + return; + else if (hi == 1+lo) { + if (array[lo].fieldInfo.name.compareTo(array[hi].fieldInfo.name) > 0) { + final DocFieldProcessorPerField tmp = array[lo]; + array[lo] = array[hi]; + array[hi] = tmp; + } + return; + } + + int mid = (lo + hi) >>> 1; + + if (array[lo].fieldInfo.name.compareTo(array[mid].fieldInfo.name) > 0) { + DocFieldProcessorPerField tmp = array[lo]; + array[lo] = array[mid]; + array[mid] = tmp; + } + + if (array[mid].fieldInfo.name.compareTo(array[hi].fieldInfo.name) > 0) { + DocFieldProcessorPerField tmp = array[mid]; + array[mid] = array[hi]; + array[hi] = tmp; + + if (array[lo].fieldInfo.name.compareTo(array[mid].fieldInfo.name) > 0) { + DocFieldProcessorPerField tmp2 = array[lo]; + array[lo] = array[mid]; + array[mid] = tmp2; + } + } + + int left = lo + 1; + int right = hi - 1; + + if (left >= right) + return; + + DocFieldProcessorPerField partition = array[mid]; + + for (; ;) { + while (array[right].fieldInfo.name.compareTo(partition.fieldInfo.name) > 0) + --right; + + while (left < right && array[left].fieldInfo.name.compareTo(partition.fieldInfo.name) <= 0) + ++left; + + if (left < right) { + DocFieldProcessorPerField tmp = array[left]; + array[left] = array[right]; + array[right] = tmp; + --right; + } else { + break; + } + } + + quickSort(array, lo, left); + quickSort(array, left + 1, hi); + } + final private Map docValues = new HashMap(); + final private Map perDocConsumers = new HashMap(); + + DocValuesConsumer docValuesConsumer(DocState docState, FieldInfo fieldInfo, FieldInfos infos) + throws IOException { + DocValuesConsumer docValuesConsumer = docValues.get(fieldInfo.name); + if (docValuesConsumer != null) { + return docValuesConsumer; + } + PerDocConsumer perDocConsumer = perDocConsumers.get(fieldInfo.getCodecId()); + if (perDocConsumer == null) { + PerDocWriteState perDocWriteState = docState.docWriter.newPerDocWriteState(fieldInfo.getCodecId()); + SegmentCodecs codecs = perDocWriteState.segmentCodecs; + assert codecs.codecs.length > fieldInfo.getCodecId(); + + Codec codec = codecs.codecs[fieldInfo.getCodecId()]; + perDocConsumer = codec.docsConsumer(perDocWriteState); + perDocConsumers.put(Integer.valueOf(fieldInfo.getCodecId()), perDocConsumer); + } + docValuesConsumer = perDocConsumer.addValuesField(fieldInfo); + docValues.put(fieldInfo.name, docValuesConsumer); + return docValuesConsumer; + } + } diff --git a/lucene/src/java/org/apache/lucene/index/DocFieldProcessorPerField.java b/lucene/src/java/org/apache/lucene/index/DocFieldProcessorPerField.java index 8fb1da45280..36b1908f6d3 100644 --- a/lucene/src/java/org/apache/lucene/index/DocFieldProcessorPerField.java +++ b/lucene/src/java/org/apache/lucene/index/DocFieldProcessorPerField.java @@ -18,6 +18,8 @@ package org.apache.lucene.index; */ import org.apache.lucene.document.Fieldable; +import org.apache.lucene.util.ArrayUtil; +import org.apache.lucene.util.RamUsageEstimator; /** * Holds all per thread, per field state. @@ -34,11 +36,22 @@ final class DocFieldProcessorPerField { int fieldCount; Fieldable[] fields = new Fieldable[1]; - public DocFieldProcessorPerField(final DocFieldProcessorPerThread perThread, final FieldInfo fieldInfo) { - this.consumer = perThread.consumer.addField(fieldInfo); + public DocFieldProcessorPerField(final DocFieldProcessor docFieldProcessor, final FieldInfo fieldInfo) { + this.consumer = docFieldProcessor.consumer.addField(fieldInfo); this.fieldInfo = fieldInfo; } + public void addField(Fieldable field) { + if (fieldCount == fields.length) { + int newSize = ArrayUtil.oversize(fieldCount + 1, RamUsageEstimator.NUM_BYTES_OBJECT_REF); + Fieldable[] newArray = new Fieldable[newSize]; + System.arraycopy(fields, 0, newArray, 0, fieldCount); + fields = newArray; + } + + fields[fieldCount++] = field; + } + public void abort() { consumer.abort(); } diff --git a/lucene/src/java/org/apache/lucene/index/DocFieldProcessorPerThread.java b/lucene/src/java/org/apache/lucene/index/DocFieldProcessorPerThread.java deleted file mode 100644 index e69424be90d..00000000000 --- a/lucene/src/java/org/apache/lucene/index/DocFieldProcessorPerThread.java +++ /dev/null @@ -1,320 +0,0 @@ -package org.apache.lucene.index; - -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import java.util.Comparator; -import java.util.Collection; -import java.util.HashSet; -import java.util.List; -import java.io.IOException; - -import org.apache.lucene.document.Document; -import org.apache.lucene.document.Fieldable; -import org.apache.lucene.index.codecs.docvalues.DocValuesConsumer; -import org.apache.lucene.index.values.PerDocFieldValues; -import org.apache.lucene.util.ArrayUtil; -import org.apache.lucene.util.RamUsageEstimator; - -/** - * Gathers all Fieldables for a document under the same - * name, updates FieldInfos, and calls per-field consumers - * to process field by field. - * - * Currently, only a single thread visits the fields, - * sequentially, for processing. - */ - -final class DocFieldProcessorPerThread extends DocConsumerPerThread { - - float docBoost; - int fieldGen; - final DocFieldProcessor docFieldProcessor; - final DocFieldConsumerPerThread consumer; - - // Holds all fields seen in current doc - DocFieldProcessorPerField[] fields = new DocFieldProcessorPerField[1]; - int fieldCount; - - // Hash table for all fields seen in current segment - DocFieldProcessorPerField[] fieldHash = new DocFieldProcessorPerField[2]; - int hashMask = 1; - int totalFieldCount; - - final StoredFieldsWriterPerThread fieldsWriter; - - final DocumentsWriter.DocState docState; - - public DocFieldProcessorPerThread(DocumentsWriterThreadState threadState, DocFieldProcessor docFieldProcessor) throws IOException { - this.docState = threadState.docState; - this.docFieldProcessor = docFieldProcessor; - this.consumer = docFieldProcessor.consumer.addThread(this); - fieldsWriter = docFieldProcessor.fieldsWriter.addThread(docState); - } - - @Override - public void abort() { - for(int i=0;i fields() { - Collection fields = new HashSet(); - for(int i=0;i fieldHash.length; - - final DocFieldProcessorPerField newHashArray[] = new DocFieldProcessorPerField[newHashSize]; - - // Rehash - int newHashMask = newHashSize-1; - for(int j=0;j docFields = doc.getFields(); - final int numDocFields = docFields.size(); - - // Absorb any new fields first seen in this document. - // Also absorb any changes to fields we had already - // seen before (eg suddenly turning on norms or - // vectors, etc.): - - for(int i=0;i= fieldHash.length/2) - rehash(); - } else { - fieldInfos.addOrUpdate(fp.fieldInfo.name, field.isIndexed(), field.isTermVectorStored(), - field.isStorePositionWithTermVector(), field.isStoreOffsetWithTermVector(), - field.getOmitNorms(), false, field.getOmitTermFreqAndPositions(), field.docValuesType()); - } - if (thisFieldGen != fp.lastGen) { - - // First time we're seeing this field for this doc - fp.fieldCount = 0; - - if (fieldCount == fields.length) { - final int newSize = fields.length*2; - DocFieldProcessorPerField newArray[] = new DocFieldProcessorPerField[newSize]; - System.arraycopy(fields, 0, newArray, 0, fieldCount); - fields = newArray; - } - - fields[fieldCount++] = fp; - fp.lastGen = thisFieldGen; - } - - if (fp.fieldCount == fp.fields.length) { - Fieldable[] newArray = new Fieldable[fp.fields.length*2]; - System.arraycopy(fp.fields, 0, newArray, 0, fp.fieldCount); - fp.fields = newArray; - } - - fp.fields[fp.fieldCount++] = field; - if (field.isStored()) { - fieldsWriter.addField(field, fp.fieldInfo); - } - } - - // If we are writing vectors then we must visit - // fields in sorted order so they are written in - // sorted order. TODO: we actually only need to - // sort the subset of fields that have vectors - // enabled; we could save [small amount of] CPU - // here. - ArrayUtil.quickSort(fields, 0, fieldCount, fieldsComp); - - - for(int i=0;i fieldsComp = new Comparator() { - public int compare(DocFieldProcessorPerField o1, DocFieldProcessorPerField o2) { - return o1.fieldInfo.name.compareTo(o2.fieldInfo.name); - } - }; - - PerDoc[] docFreeList = new PerDoc[1]; - int freeCount; - int allocCount; - - synchronized PerDoc getPerDoc() { - if (freeCount == 0) { - allocCount++; - if (allocCount > docFreeList.length) { - // Grow our free list up front to make sure we have - // enough space to recycle all outstanding PerDoc - // instances - assert allocCount == 1+docFreeList.length; - docFreeList = new PerDoc[ArrayUtil.oversize(allocCount, RamUsageEstimator.NUM_BYTES_OBJECT_REF)]; - } - return new PerDoc(); - } else - return docFreeList[--freeCount]; - } - - synchronized void freePerDoc(PerDoc perDoc) { - assert freeCount < docFreeList.length; - docFreeList[freeCount++] = perDoc; - } - - class PerDoc extends DocumentsWriter.DocWriter { - - DocumentsWriter.DocWriter one; - DocumentsWriter.DocWriter two; - - @Override - public long sizeInBytes() { - return one.sizeInBytes() + two.sizeInBytes(); - } - - @Override - public void finish() throws IOException { - try { - try { - one.finish(); - } finally { - two.finish(); - } - } finally { - freePerDoc(this); - } - } - - @Override - public void abort() { - try { - try { - one.abort(); - } finally { - two.abort(); - } - } finally { - freePerDoc(this); - } - } - } -} diff --git a/lucene/src/java/org/apache/lucene/index/DocInverter.java b/lucene/src/java/org/apache/lucene/index/DocInverter.java index 48e8edfb2ba..95c09763fad 100644 --- a/lucene/src/java/org/apache/lucene/index/DocInverter.java +++ b/lucene/src/java/org/apache/lucene/index/DocInverter.java @@ -18,12 +18,13 @@ package org.apache.lucene.index; */ import java.io.IOException; -import java.util.Collection; import java.util.HashMap; -import java.util.HashSet; - import java.util.Map; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; +import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; +import org.apache.lucene.util.AttributeSource; + /** This is a DocFieldConsumer that inverts each field, * separately, from a Document, and accepts a @@ -34,42 +35,72 @@ final class DocInverter extends DocFieldConsumer { final InvertedDocConsumer consumer; final InvertedDocEndConsumer endConsumer; - public DocInverter(InvertedDocConsumer consumer, InvertedDocEndConsumer endConsumer) { + final DocumentsWriterPerThread.DocState docState; + + final FieldInvertState fieldState = new FieldInvertState(); + + final SingleTokenAttributeSource singleToken = new SingleTokenAttributeSource(); + + static class SingleTokenAttributeSource extends AttributeSource { + final CharTermAttribute termAttribute; + final OffsetAttribute offsetAttribute; + + private SingleTokenAttributeSource() { + termAttribute = addAttribute(CharTermAttribute.class); + offsetAttribute = addAttribute(OffsetAttribute.class); + } + + public void reinit(String stringValue, int startOffset, int endOffset) { + termAttribute.setEmpty().append(stringValue); + offsetAttribute.setOffset(startOffset, endOffset); + } + } + + // Used to read a string value for a field + final ReusableStringReader stringReader = new ReusableStringReader(); + + public DocInverter(DocumentsWriterPerThread.DocState docState, InvertedDocConsumer consumer, InvertedDocEndConsumer endConsumer) { + this.docState = docState; this.consumer = consumer; this.endConsumer = endConsumer; } @Override - void flush(Map> threadsAndFields, SegmentWriteState state) throws IOException { + void flush(Map fieldsToFlush, SegmentWriteState state) throws IOException { - Map> childThreadsAndFields = new HashMap>(); - Map> endChildThreadsAndFields = new HashMap>(); + Map childFieldsToFlush = new HashMap(); + Map endChildFieldsToFlush = new HashMap(); - for (Map.Entry> entry : threadsAndFields.entrySet() ) { - - - DocInverterPerThread perThread = (DocInverterPerThread) entry.getKey(); - - Collection childFields = new HashSet(); - Collection endChildFields = new HashSet(); - for (final DocFieldConsumerPerField field: entry.getValue() ) { - DocInverterPerField perField = (DocInverterPerField) field; - childFields.add(perField.consumer); - endChildFields.add(perField.endConsumer); - } - - childThreadsAndFields.put(perThread.consumer, childFields); - endChildThreadsAndFields.put(perThread.endConsumer, endChildFields); + for (Map.Entry fieldToFlush : fieldsToFlush.entrySet()) { + DocInverterPerField perField = (DocInverterPerField) fieldToFlush.getValue(); + childFieldsToFlush.put(fieldToFlush.getKey(), perField.consumer); + endChildFieldsToFlush.put(fieldToFlush.getKey(), perField.endConsumer); } - - consumer.flush(childThreadsAndFields, state); - endConsumer.flush(endChildThreadsAndFields, state); + + consumer.flush(childFieldsToFlush, state); + endConsumer.flush(endChildFieldsToFlush, state); + } + + @Override + public void startDocument() throws IOException { + consumer.startDocument(); + endConsumer.startDocument(); + } + + public void finishDocument() throws IOException { + // TODO: allow endConsumer.finishDocument to also return + // a DocWriter + endConsumer.finishDocument(); + consumer.finishDocument(); } @Override void abort() { - consumer.abort(); - endConsumer.abort(); + try { + consumer.abort(); + } finally { + endConsumer.abort(); + } } @Override @@ -78,7 +109,8 @@ final class DocInverter extends DocFieldConsumer { } @Override - public DocFieldConsumerPerThread addThread(DocFieldProcessorPerThread docFieldProcessorPerThread) { - return new DocInverterPerThread(docFieldProcessorPerThread, this); + public DocFieldConsumerPerField addField(FieldInfo fi) { + return new DocInverterPerField(this, fi); } + } diff --git a/lucene/src/java/org/apache/lucene/index/DocInverterPerField.java b/lucene/src/java/org/apache/lucene/index/DocInverterPerField.java index d360fbfb230..2463326295c 100644 --- a/lucene/src/java/org/apache/lucene/index/DocInverterPerField.java +++ b/lucene/src/java/org/apache/lucene/index/DocInverterPerField.java @@ -35,20 +35,20 @@ import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; final class DocInverterPerField extends DocFieldConsumerPerField { - final private DocInverterPerThread perThread; - final private FieldInfo fieldInfo; + final private DocInverter parent; + final FieldInfo fieldInfo; final InvertedDocConsumerPerField consumer; final InvertedDocEndConsumerPerField endConsumer; - final DocumentsWriter.DocState docState; + final DocumentsWriterPerThread.DocState docState; final FieldInvertState fieldState; - public DocInverterPerField(DocInverterPerThread perThread, FieldInfo fieldInfo) { - this.perThread = perThread; + public DocInverterPerField(DocInverter parent, FieldInfo fieldInfo) { + this.parent = parent; this.fieldInfo = fieldInfo; - docState = perThread.docState; - fieldState = perThread.fieldState; - this.consumer = perThread.consumer.addField(this, fieldInfo); - this.endConsumer = perThread.endConsumer.addField(this, fieldInfo); + docState = parent.docState; + fieldState = parent.fieldState; + this.consumer = parent.consumer.addField(this, fieldInfo); + this.endConsumer = parent.endConsumer.addField(this, fieldInfo); } @Override @@ -80,8 +80,8 @@ final class DocInverterPerField extends DocFieldConsumerPerField { if (!field.isTokenized()) { // un-tokenized field String stringValue = field.stringValue(); final int valueLength = stringValue.length(); - perThread.singleToken.reinit(stringValue, 0, valueLength); - fieldState.attributeSource = perThread.singleToken; + parent.singleToken.reinit(stringValue, 0, valueLength); + fieldState.attributeSource = parent.singleToken; consumer.start(field); boolean success = false; @@ -89,8 +89,9 @@ final class DocInverterPerField extends DocFieldConsumerPerField { consumer.add(); success = true; } finally { - if (!success) + if (!success) { docState.docWriter.setAborting(); + } } fieldState.offset += valueLength; fieldState.length++; @@ -114,8 +115,8 @@ final class DocInverterPerField extends DocFieldConsumerPerField { if (stringValue == null) { throw new IllegalArgumentException("field must have either TokenStream, String or Reader value"); } - perThread.stringReader.init(stringValue); - reader = perThread.stringReader; + parent.stringReader.init(stringValue); + reader = parent.stringReader; } // Tokenize field and add to postingTable @@ -166,8 +167,9 @@ final class DocInverterPerField extends DocFieldConsumerPerField { consumer.add(); success = true; } finally { - if (!success) + if (!success) { docState.docWriter.setAborting(); + } } fieldState.length++; fieldState.position++; @@ -195,4 +197,9 @@ final class DocInverterPerField extends DocFieldConsumerPerField { consumer.finish(); endConsumer.finish(); } + + @Override + FieldInfo getFieldInfo() { + return fieldInfo; + } } diff --git a/lucene/src/java/org/apache/lucene/index/DocInverterPerThread.java b/lucene/src/java/org/apache/lucene/index/DocInverterPerThread.java deleted file mode 100644 index 2816519f9b2..00000000000 --- a/lucene/src/java/org/apache/lucene/index/DocInverterPerThread.java +++ /dev/null @@ -1,92 +0,0 @@ -package org.apache.lucene.index; - -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import java.io.IOException; - -import org.apache.lucene.util.AttributeSource; -import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; -import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; - -/** This is a DocFieldConsumer that inverts each field, - * separately, from a Document, and accepts a - * InvertedTermsConsumer to process those terms. */ - -final class DocInverterPerThread extends DocFieldConsumerPerThread { - final DocInverter docInverter; - final InvertedDocConsumerPerThread consumer; - final InvertedDocEndConsumerPerThread endConsumer; - final SingleTokenAttributeSource singleToken = new SingleTokenAttributeSource(); - - static class SingleTokenAttributeSource extends AttributeSource { - final CharTermAttribute termAttribute; - final OffsetAttribute offsetAttribute; - - private SingleTokenAttributeSource() { - termAttribute = addAttribute(CharTermAttribute.class); - offsetAttribute = addAttribute(OffsetAttribute.class); - } - - public void reinit(String stringValue, int startOffset, int endOffset) { - termAttribute.setEmpty().append(stringValue); - offsetAttribute.setOffset(startOffset, endOffset); - } - } - - final DocumentsWriter.DocState docState; - - final FieldInvertState fieldState = new FieldInvertState(); - - // Used to read a string value for a field - final ReusableStringReader stringReader = new ReusableStringReader(); - - public DocInverterPerThread(DocFieldProcessorPerThread docFieldProcessorPerThread, DocInverter docInverter) { - this.docInverter = docInverter; - docState = docFieldProcessorPerThread.docState; - consumer = docInverter.consumer.addThread(this); - endConsumer = docInverter.endConsumer.addThread(this); - } - - @Override - public void startDocument() throws IOException { - consumer.startDocument(); - endConsumer.startDocument(); - } - - @Override - public DocumentsWriter.DocWriter finishDocument() throws IOException { - // TODO: allow endConsumer.finishDocument to also return - // a DocWriter - endConsumer.finishDocument(); - return consumer.finishDocument(); - } - - @Override - void abort() { - try { - consumer.abort(); - } finally { - endConsumer.abort(); - } - } - - @Override - public DocFieldConsumerPerField addField(FieldInfo fi) { - return new DocInverterPerField(this, fi); - } -} diff --git a/lucene/src/java/org/apache/lucene/index/DocTermOrds.java b/lucene/src/java/org/apache/lucene/index/DocTermOrds.java new file mode 100644 index 00000000000..7bf10a8b06f --- /dev/null +++ b/lucene/src/java/org/apache/lucene/index/DocTermOrds.java @@ -0,0 +1,801 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.lucene.index; + +import org.apache.lucene.util.PagedBytes; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.Bits; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.Comparator; + +/** + * This class enables fast access to multiple term ords for + * a specified field across all docIDs. + * + * Like FieldCache, it uninverts the index and holds a + * packed data structure in RAM to enable fast access. + * Unlike FieldCache, it can handle multi-valued fields, + * and, it does not hold the term bytes in RAM. Rather, you + * must obtain a TermsEnum from the {@link #getOrdTermsEnum} + * method, and then seek-by-ord to get the term's bytes. + * + * While normally term ords are type long, in this API they are + * int as the internal representation here cannot address + * more than MAX_INT unique terms. Also, typically this + * class is used on fields with relatively few unique terms + * vs the number of documents. In addition, there is an + * internal limit (16 MB) on how many bytes each chunk of + * documents may consume. If you trip this limit you'll hit + * an IllegalStateException. + * + * Deleted documents are skipped during uninversion, and if + * you look them up you'll get 0 ords. + * + * The returned per-document ords do not retain their + * original order in the document. Instead they are returned + * in sorted (by ord, ie term's BytesRef comparator) order. They + * are also de-dup'd (ie if doc has same term more than once + * in this field, you'll only get that ord back once). + * + * This class tests whether the provided reader is able to + * retrieve terms by ord (ie, it's single segment, and it + * uses an ord-capable terms index). If not, this class + * will create its own term index internally, allowing to + * create a wrapped TermsEnum that can handle ord. The + * {@link #getOrdTermsEnum} method then provides this + * wrapped enum, if necessary. + * + * The RAM consumption of this class can be high! + * + * @lucene.experimental + */ + +/* + * Final form of the un-inverted field: + * Each document points to a list of term numbers that are contained in that document. + * + * Term numbers are in sorted order, and are encoded as variable-length deltas from the + * previous term number. Real term numbers start at 2 since 0 and 1 are reserved. A + * term number of 0 signals the end of the termNumber list. + * + * There is a single int[maxDoc()] which either contains a pointer into a byte[] for + * the termNumber lists, or directly contains the termNumber list if it fits in the 4 + * bytes of an integer. If the first byte in the integer is 1, the next 3 bytes + * are a pointer into a byte[] where the termNumber list starts. + * + * There are actually 256 byte arrays, to compensate for the fact that the pointers + * into the byte arrays are only 3 bytes long. The correct byte array for a document + * is a function of it's id. + * + * To save space and speed up faceting, any term that matches enough documents will + * not be un-inverted... it will be skipped while building the un-inverted field structure, + * and will use a set intersection method during faceting. + * + * To further save memory, the terms (the actual string values) are not all stored in + * memory, but a TermIndex is used to convert term numbers to term values only + * for the terms needed after faceting has completed. Only every 128th term value + * is stored, along with it's corresponding term number, and this is used as an + * index to find the closest term and iterate until the desired number is hit (very + * much like Lucene's own internal term index). + * + */ + +public class DocTermOrds { + + // Term ords are shifted by this, internally, to reseve + // values 0 (end term) and 1 (index is a pointer into byte array) + private final static int TNUM_OFFSET = 2; + + // Default: every 128th term is indexed + public final static int DEFAULT_INDEX_INTERVAL_BITS = 7; // decrease to a low number like 2 for testing + + private int indexIntervalBits; + private int indexIntervalMask; + private int indexInterval; + + protected final int maxTermDocFreq; + + protected final String field; + + protected int numTermsInField; + protected long termInstances; // total number of references to term numbers + private long memsz; + protected int total_time; // total time to uninvert the field + protected int phase1_time; // time for phase1 of the uninvert process + + protected int[] index; + protected byte[][] tnums = new byte[256][]; + protected long sizeOfIndexedStrings; + protected BytesRef[] indexedTermsArray; + protected BytesRef prefix; + protected int ordBase; + + protected DocsEnum docsEnum; //used while uninverting + + public long ramUsedInBytes() { + // can cache the mem size since it shouldn't change + if (memsz!=0) return memsz; + long sz = 8*8 + 32; // local fields + if (index != null) sz += index.length * 4; + if (tnums!=null) { + for (byte[] arr : tnums) + if (arr != null) sz += arr.length; + } + memsz = sz; + return sz; + } + + /** Inverts all terms */ + public DocTermOrds(IndexReader reader, String field) throws IOException { + this(reader, field, null, Integer.MAX_VALUE); + } + + /** Inverts only terms starting w/ prefix */ + public DocTermOrds(IndexReader reader, String field, BytesRef termPrefix) throws IOException { + this(reader, field, termPrefix, Integer.MAX_VALUE); + } + + /** Inverts only terms starting w/ prefix, and only terms + * whose docFreq (not taking deletions into account) is + * <= maxTermDocFreq */ + public DocTermOrds(IndexReader reader, String field, BytesRef termPrefix, int maxTermDocFreq) throws IOException { + this(reader, field, termPrefix, maxTermDocFreq, DEFAULT_INDEX_INTERVAL_BITS); + uninvert(reader, termPrefix); + } + + /** Inverts only terms starting w/ prefix, and only terms + * whose docFreq (not taking deletions into account) is + * <= maxTermDocFreq, with a custom indexing interval + * (default is every 128nd term). */ + public DocTermOrds(IndexReader reader, String field, BytesRef termPrefix, int maxTermDocFreq, int indexIntervalBits) throws IOException { + this(field, maxTermDocFreq, indexIntervalBits); + uninvert(reader, termPrefix); + } + + /** Subclass inits w/ this, but be sure you then call + * uninvert, only once */ + protected DocTermOrds(String field, int maxTermDocFreq, int indexIntervalBits) throws IOException { + //System.out.println("DTO init field=" + field + " maxTDFreq=" + maxTermDocFreq); + this.field = field; + this.maxTermDocFreq = maxTermDocFreq; + this.indexIntervalBits = indexIntervalBits; + indexIntervalMask = 0xffffffff >>> (32-indexIntervalBits); + indexInterval = 1 << indexIntervalBits; + } + + /** Returns a TermsEnum that implements ord. If the + * provided reader supports ord, we just return its + * TermsEnum; if it does not, we build a "private" terms + * index internally (WARNING: consumes RAM) and use that + * index to implement ord. This also enables ord on top + * of a composite reader. The returned TermsEnum is + * unpositioned. This returns null if there are no terms. + * + *

NOTE: you must pass the same reader that was + * used when creating this class */ + public TermsEnum getOrdTermsEnum(IndexReader reader) throws IOException { + if (termInstances == 0) { + return null; + } + if (indexedTermsArray == null) { + //System.out.println("GET normal enum"); + final Terms terms = MultiFields.getTerms(reader, field); + if (terms != null) { + return terms.iterator(); + } else { + return null; + } + } else { + //System.out.println("GET wrapped enum ordBase=" + ordBase); + return new OrdWrappedTermsEnum(reader); + } + } + + /** Subclass can override this */ + protected void visitTerm(TermsEnum te, int termNum) throws IOException { + } + + protected void setActualDocFreq(int termNum, int df) throws IOException { + } + + // Call this only once (if you subclass!) + protected void uninvert(final IndexReader reader, final BytesRef termPrefix) throws IOException { + //System.out.println("DTO uninvert field=" + field + " prefix=" + termPrefix); + final long startTime = System.currentTimeMillis(); + prefix = termPrefix == null ? null : new BytesRef(termPrefix); + + final int maxDoc = reader.maxDoc(); + final int[] index = new int[maxDoc]; // immediate term numbers, or the index into the byte[] representing the last number + final int[] lastTerm = new int[maxDoc]; // last term we saw for this document + final byte[][] bytes = new byte[maxDoc][]; // list of term numbers for the doc (delta encoded vInts) + + final Terms terms = MultiFields.getTerms(reader, field); + if (terms == null) { + // No terms + return; + } + + final TermsEnum te = terms.iterator(); + final BytesRef seekStart = termPrefix != null ? termPrefix : new BytesRef(); + //System.out.println("seekStart=" + seekStart.utf8ToString()); + if (te.seek(seekStart) == TermsEnum.SeekStatus.END) { + // No terms match + return; + } + + // If we need our "term index wrapper", these will be + // init'd below: + List indexedTerms = null; + PagedBytes indexedTermsBytes = null; + + boolean testedOrd = false; + + final Bits delDocs = MultiFields.getDeletedDocs(reader); + + // we need a minimum of 9 bytes, but round up to 12 since the space would + // be wasted with most allocators anyway. + byte[] tempArr = new byte[12]; + + // + // enumerate all terms, and build an intermediate form of the un-inverted field. + // + // During this intermediate form, every document has a (potential) byte[] + // and the int[maxDoc()] array either contains the termNumber list directly + // or the *end* offset of the termNumber list in it's byte array (for faster + // appending and faster creation of the final form). + // + // idea... if things are too large while building, we could do a range of docs + // at a time (but it would be a fair amount slower to build) + // could also do ranges in parallel to take advantage of multiple CPUs + + // OPTIONAL: remap the largest df terms to the lowest 128 (single byte) + // values. This requires going over the field first to find the most + // frequent terms ahead of time. + + int termNum = 0; + docsEnum = null; + + // Loop begins with te positioned to first term (we call + // seek above): + for (;;) { + final BytesRef t = te.term(); + if (t == null || (termPrefix != null && !t.startsWith(termPrefix))) { + break; + } + //System.out.println("visit term=" + t.utf8ToString() + " " + t + " termNum=" + termNum); + + if (!testedOrd) { + try { + ordBase = (int) te.ord(); + //System.out.println("got ordBase=" + ordBase); + } catch (UnsupportedOperationException uoe) { + // Reader cannot provide ord support, so we wrap + // our own support by creating our own terms index: + indexedTerms = new ArrayList(); + indexedTermsBytes = new PagedBytes(15); + //System.out.println("NO ORDS"); + } + testedOrd = true; + } + + visitTerm(te, termNum); + + if (indexedTerms != null && (termNum & indexIntervalMask) == 0) { + // Index this term + sizeOfIndexedStrings += t.length; + BytesRef indexedTerm = new BytesRef(); + indexedTermsBytes.copy(t, indexedTerm); + // TODO: really should 1) strip off useless suffix, + // and 2) use FST not array/PagedBytes + indexedTerms.add(indexedTerm); + } + + final int df = te.docFreq(); + if (df <= maxTermDocFreq) { + + docsEnum = te.docs(delDocs, docsEnum); + + final DocsEnum.BulkReadResult bulkResult = docsEnum.getBulkResult(); + + // dF, but takes deletions into account + int actualDF = 0; + + for (;;) { + int chunk = docsEnum.read(); + if (chunk <= 0) { + break; + } + //System.out.println(" chunk=" + chunk + " docs"); + + actualDF += chunk; + + for (int i=0; i>>=8; + } + // point at the end index in the byte[] + index[doc] = (endPos<<8) | 1; + bytes[doc] = tempArr; + tempArr = new byte[12]; + } + } + } + } + setActualDocFreq(termNum, actualDF); + } + + termNum++; + if (te.next() == null) { + break; + } + } + + numTermsInField = termNum; + + long midPoint = System.currentTimeMillis(); + + if (termInstances == 0) { + // we didn't invert anything + // lower memory consumption. + tnums = null; + } else { + + this.index = index; + + // + // transform intermediate form into the final form, building a single byte[] + // at a time, and releasing the intermediate byte[]s as we go to avoid + // increasing the memory footprint. + // + + for (int pass = 0; pass<256; pass++) { + byte[] target = tnums[pass]; + int pos=0; // end in target; + if (target != null) { + pos = target.length; + } else { + target = new byte[4096]; + } + + // loop over documents, 0x00ppxxxx, 0x01ppxxxx, 0x02ppxxxx + // where pp is the pass (which array we are building), and xx is all values. + // each pass shares the same byte[] for termNumber lists. + for (int docbase = pass<<16; docbase maxDoc) + break; + } + + if (indexedTerms != null) { + indexedTermsArray = indexedTerms.toArray(new BytesRef[indexedTerms.size()]); + } + } + + long endTime = System.currentTimeMillis(); + + total_time = (int)(endTime-startTime); + phase1_time = (int)(midPoint-startTime); + } + + /** Number of bytes to represent an unsigned int as a vint. */ + private static int vIntSize(int x) { + if ((x & (0xffffffff << (7*1))) == 0 ) { + return 1; + } + if ((x & (0xffffffff << (7*2))) == 0 ) { + return 2; + } + if ((x & (0xffffffff << (7*3))) == 0 ) { + return 3; + } + if ((x & (0xffffffff << (7*4))) == 0 ) { + return 4; + } + return 5; + } + + // todo: if we know the size of the vInt already, we could do + // a single switch on the size + private static int writeInt(int x, byte[] arr, int pos) { + int a; + a = (x >>> (7*4)); + if (a != 0) { + arr[pos++] = (byte)(a | 0x80); + } + a = (x >>> (7*3)); + if (a != 0) { + arr[pos++] = (byte)(a | 0x80); + } + a = (x >>> (7*2)); + if (a != 0) { + arr[pos++] = (byte)(a | 0x80); + } + a = (x >>> (7*1)); + if (a != 0) { + arr[pos++] = (byte)(a | 0x80); + } + arr[pos++] = (byte)(x & 0x7f); + return pos; + } + + public class TermOrdsIterator { + private int tnum; + private int upto; + private byte[] arr; + + /** Buffer must be at least 5 ints long. Returns number + * of term ords placed into buffer; if this count is + * less than buffer.length then that is the end. */ + public int read(int[] buffer) { + int bufferUpto = 0; + if (arr == null) { + // code is inlined into upto + //System.out.println("inlined"); + int code = upto; + int delta = 0; + for (;;) { + delta = (delta << 7) | (code & 0x7f); + if ((code & 0x80)==0) { + if (delta==0) break; + tnum += delta - TNUM_OFFSET; + buffer[bufferUpto++] = ordBase+tnum; + //System.out.println(" tnum=" + tnum); + delta = 0; + } + code >>>= 8; + } + } else { + // code is a pointer + for(;;) { + int delta = 0; + for(;;) { + byte b = arr[upto++]; + delta = (delta << 7) | (b & 0x7f); + //System.out.println(" cycle: upto=" + upto + " delta=" + delta + " b=" + b); + if ((b & 0x80) == 0) break; + } + //System.out.println(" delta=" + delta); + if (delta == 0) break; + tnum += delta - TNUM_OFFSET; + //System.out.println(" tnum=" + tnum); + buffer[bufferUpto++] = ordBase+tnum; + if (bufferUpto == buffer.length) { + break; + } + } + } + + return bufferUpto; + } + + public TermOrdsIterator reset(int docID) { + //System.out.println(" reset docID=" + docID); + tnum = 0; + final int code = index[docID]; + if ((code & 0xff)==1) { + // a pointer + upto = code>>>8; + //System.out.println(" pointer! upto=" + upto); + int whichArray = (docID >>> 16) & 0xff; + arr = tnums[whichArray]; + } else { + //System.out.println(" inline!"); + arr = null; + upto = code; + } + return this; + } + } + + /** Returns an iterator to step through the term ords for + * this document. It's also possible to subclass this + * class and directly access members. */ + public TermOrdsIterator lookup(int doc, TermOrdsIterator reuse) { + final TermOrdsIterator ret; + if (reuse != null) { + ret = reuse; + } else { + ret = new TermOrdsIterator(); + } + return ret.reset(doc); + } + + /* Only used if original IndexReader doesn't implement + * ord; in this case we "wrap" our own terms index + * around it. */ + private final class OrdWrappedTermsEnum extends TermsEnum { + private final IndexReader reader; + private final TermsEnum termsEnum; + private BytesRef term; + private long ord = -indexInterval-1; // force "real" seek + + public OrdWrappedTermsEnum(IndexReader reader) throws IOException { + this.reader = reader; + assert indexedTermsArray != null; + termsEnum = MultiFields.getTerms(reader, field).iterator(); + } + + @Override + public Comparator getComparator() throws IOException { + return termsEnum.getComparator(); + } + + @Override + public DocsEnum docs(Bits skipDocs, DocsEnum reuse) throws IOException { + return termsEnum.docs(skipDocs, reuse); + } + + @Override + public DocsAndPositionsEnum docsAndPositions(Bits skipDocs, DocsAndPositionsEnum reuse) throws IOException { + return termsEnum.docsAndPositions(skipDocs, reuse); + } + + @Override + public BytesRef term() { + return term; + } + + @Override + public BytesRef next() throws IOException { + ord++; + if (termsEnum.next() == null) { + term = null; + return null; + } + return setTerm(); // this is extra work if we know we are in bounds... + } + + @Override + public int docFreq() throws IOException { + return termsEnum.docFreq(); + } + + @Override + public long totalTermFreq() throws IOException { + return termsEnum.totalTermFreq(); + } + + @Override + public long ord() throws IOException { + return ordBase + ord; + } + + @Override + public SeekStatus seek(BytesRef target, boolean useCache) throws IOException { + + // already here + if (term != null && term.equals(target)) { + return SeekStatus.FOUND; + } + + int startIdx = Arrays.binarySearch(indexedTermsArray, target); + + if (startIdx >= 0) { + // we hit the term exactly... lucky us! + TermsEnum.SeekStatus seekStatus = termsEnum.seek(target); + assert seekStatus == TermsEnum.SeekStatus.FOUND; + ord = startIdx << indexIntervalBits; + setTerm(); + assert term != null; + return SeekStatus.FOUND; + } + + // we didn't hit the term exactly + startIdx = -startIdx-1; + + if (startIdx == 0) { + // our target occurs *before* the first term + TermsEnum.SeekStatus seekStatus = termsEnum.seek(target); + assert seekStatus == TermsEnum.SeekStatus.NOT_FOUND; + ord = 0; + setTerm(); + assert term != null; + return SeekStatus.NOT_FOUND; + } + + // back up to the start of the block + startIdx--; + + if ((ord >> indexIntervalBits) == startIdx && term != null && term.compareTo(target) <= 0) { + // we are already in the right block and the current term is before the term we want, + // so we don't need to seek. + } else { + // seek to the right block + TermsEnum.SeekStatus seekStatus = termsEnum.seek(indexedTermsArray[startIdx]); + assert seekStatus == TermsEnum.SeekStatus.FOUND; + ord = startIdx << indexIntervalBits; + setTerm(); + assert term != null; // should be non-null since it's in the index + } + + while (term != null && term.compareTo(target) < 0) { + next(); + } + + if (term == null) { + return SeekStatus.END; + } else if (term.compareTo(target) == 0) { + return SeekStatus.FOUND; + } else { + return SeekStatus.NOT_FOUND; + } + } + + @Override + public SeekStatus seek(long targetOrd) throws IOException { + int delta = (int) (targetOrd - ordBase - ord); + //System.out.println(" seek(ord) targetOrd=" + targetOrd + " delta=" + delta + " ord=" + ord); + if (delta < 0 || delta > indexInterval) { + final int idx = (int) (targetOrd >>> indexIntervalBits); + final BytesRef base = indexedTermsArray[idx]; + //System.out.println(" do seek term=" + base.utf8ToString()); + ord = idx << indexIntervalBits; + delta = (int) (targetOrd - ord); + final TermsEnum.SeekStatus seekStatus = termsEnum.seek(base, true); + assert seekStatus == TermsEnum.SeekStatus.FOUND; + } else { + //System.out.println("seek w/in block"); + } + + while (--delta >= 0) { + BytesRef br = termsEnum.next(); + if (br == null) { + term = null; + return null; + } + ord++; + } + + setTerm(); + return term == null ? SeekStatus.END : SeekStatus.FOUND; + //System.out.println(" return term=" + term.utf8ToString()); + } + + private BytesRef setTerm() throws IOException { + term = termsEnum.term(); + //System.out.println(" setTerm() term=" + term.utf8ToString() + " vs prefix=" + (prefix == null ? "null" : prefix.utf8ToString())); + if (prefix != null && !term.startsWith(prefix)) { + term = null; + } + return term; + } + } + + public BytesRef lookupTerm(TermsEnum termsEnum, int ord) throws IOException { + TermsEnum.SeekStatus status = termsEnum.seek(ord); + assert status == TermsEnum.SeekStatus.FOUND; + return termsEnum.term(); + } +} diff --git a/lucene/src/java/org/apache/lucene/index/DocumentsWriter.java b/lucene/src/java/org/apache/lucene/index/DocumentsWriter.java index 4f81085a38e..5e316c21fee 100644 --- a/lucene/src/java/org/apache/lucene/index/DocumentsWriter.java +++ b/lucene/src/java/org/apache/lucene/index/DocumentsWriter.java @@ -19,36 +19,27 @@ package org.apache.lucene.index; import java.io.IOException; import java.io.PrintStream; -import java.text.NumberFormat; -import java.util.ArrayList; import java.util.Collection; -import java.util.HashMap; -import java.util.HashSet; +import java.util.Iterator; +import java.util.LinkedList; import java.util.List; -import java.util.concurrent.atomic.AtomicLong; +import java.util.Queue; +import java.util.concurrent.atomic.AtomicInteger; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.document.Document; +import org.apache.lucene.index.DocumentsWriterPerThread.FlushedSegment; +import org.apache.lucene.index.DocumentsWriterPerThread.IndexingChain; +import org.apache.lucene.index.DocumentsWriterPerThreadPool.ThreadState; +import org.apache.lucene.index.FieldInfos.FieldNumberBiMap; import org.apache.lucene.search.Query; import org.apache.lucene.search.SimilarityProvider; import org.apache.lucene.store.AlreadyClosedException; import org.apache.lucene.store.Directory; -import org.apache.lucene.store.RAMFile; -import org.apache.lucene.util.ArrayUtil; -import org.apache.lucene.util.BitVector; -import org.apache.lucene.util.RamUsageEstimator; -import org.apache.lucene.util.RecyclingByteBlockAllocator; -import org.apache.lucene.util.ThreadInterruptedException; - -import static org.apache.lucene.util.ByteBlockPool.BYTE_BLOCK_MASK; -import static org.apache.lucene.util.ByteBlockPool.BYTE_BLOCK_SIZE; /** * This class accepts multiple added documents and directly - * writes a single segment file. It does this more - * efficiently than creating a single segment per document - * (with DocumentWriter) and doing standard merges on those - * segments. + * writes segment files. * * Each added document is passed to the {@link DocConsumer}, * which in turn processes the document and interacts with @@ -111,266 +102,117 @@ import static org.apache.lucene.util.ByteBlockPool.BYTE_BLOCK_SIZE; */ final class DocumentsWriter { - final AtomicLong bytesUsed = new AtomicLong(0); - IndexWriter writer; Directory directory; - String segment; // Current segment we are working on - - private int nextDocID; // Next docID to be added - private int numDocs; // # of docs added, but not yet flushed - - // Max # ThreadState instances; if there are more threads - // than this they share ThreadStates - private DocumentsWriterThreadState[] threadStates = new DocumentsWriterThreadState[0]; - private final HashMap threadBindings = new HashMap(); - - boolean bufferIsFull; // True when it's time to write segment - private boolean aborting; // True if an abort is pending + private volatile boolean closed; PrintStream infoStream; SimilarityProvider similarityProvider; - // max # simultaneous threads; if there are more than - // this, they wait for others to finish first - private final int maxThreadStates; + List newFiles; - // TODO: cutover to BytesRefHash - // Deletes for our still-in-RAM (to be flushed next) segment - private BufferedDeletes pendingDeletes = new BufferedDeletes(false); - - static class DocState { - DocumentsWriter docWriter; - Analyzer analyzer; - PrintStream infoStream; - SimilarityProvider similarityProvider; - int docID; - Document doc; - String maxTermPrefix; + final IndexWriter indexWriter; - // Only called by asserts - public boolean testPoint(String name) { - return docWriter.writer.testPoint(name); - } + private AtomicInteger numDocsInRAM = new AtomicInteger(0); - public void clear() { - // don't hold onto doc nor analyzer, in case it is - // largish: - doc = null; - analyzer = null; - } - } + // TODO: cut over to BytesRefHash in BufferedDeletes + volatile DocumentsWriterDeleteQueue deleteQueue = new DocumentsWriterDeleteQueue(); + private final Queue ticketQueue = new LinkedList(); - /** Consumer returns this on each doc. This holds any - * state that must be flushed synchronized "in docID - * order". We gather these and flush them in order. */ - abstract static class DocWriter { - DocWriter next; - int docID; - abstract void finish() throws IOException; - abstract void abort(); - abstract long sizeInBytes(); + private Collection abortedFiles; // List of files that were written before last abort() - void setNext(DocWriter next) { - this.next = next; - } - } + final IndexingChain chain; - /** - * Create and return a new DocWriterBuffer. - */ - PerDocBuffer newPerDocBuffer() { - return new PerDocBuffer(); - } - - /** - * RAMFile buffer for DocWriters. - */ - class PerDocBuffer extends RAMFile { - - /** - * Allocate bytes used from shared pool. - */ - @Override - protected byte[] newBuffer(int size) { - assert size == PER_DOC_BLOCK_SIZE; - return perDocAllocator.getByteBlock(); - } - - /** - * Recycle the bytes used. - */ - synchronized void recycle() { - if (buffers.size() > 0) { - setLength(0); - - // Recycle the blocks - perDocAllocator.recycleByteBlocks(buffers); - buffers.clear(); - sizeInBytes = 0; - - assert numBuffers() == 0; - } - } - } - - /** - * The IndexingChain must define the {@link #getChain(DocumentsWriter)} method - * which returns the DocConsumer that the DocumentsWriter calls to process the - * documents. - */ - abstract static class IndexingChain { - abstract DocConsumer getChain(DocumentsWriter documentsWriter); - } - - static final IndexingChain defaultIndexingChain = new IndexingChain() { - - @Override - DocConsumer getChain(DocumentsWriter documentsWriter) { - /* - This is the current indexing chain: - - DocConsumer / DocConsumerPerThread - --> code: DocFieldProcessor / DocFieldProcessorPerThread - --> DocFieldConsumer / DocFieldConsumerPerThread / DocFieldConsumerPerField - --> code: DocFieldConsumers / DocFieldConsumersPerThread / DocFieldConsumersPerField - --> code: DocInverter / DocInverterPerThread / DocInverterPerField - --> InvertedDocConsumer / InvertedDocConsumerPerThread / InvertedDocConsumerPerField - --> code: TermsHash / TermsHashPerThread / TermsHashPerField - --> TermsHashConsumer / TermsHashConsumerPerThread / TermsHashConsumerPerField - --> code: FreqProxTermsWriter / FreqProxTermsWriterPerThread / FreqProxTermsWriterPerField - --> code: TermVectorsTermsWriter / TermVectorsTermsWriterPerThread / TermVectorsTermsWriterPerField - --> InvertedDocEndConsumer / InvertedDocConsumerPerThread / InvertedDocConsumerPerField - --> code: NormsWriter / NormsWriterPerThread / NormsWriterPerField - --> code: StoredFieldsWriter / StoredFieldsWriterPerThread / StoredFieldsWriterPerField - */ - - // Build up indexing chain: - - final TermsHashConsumer termVectorsWriter = new TermVectorsTermsWriter(documentsWriter); - final TermsHashConsumer freqProxWriter = new FreqProxTermsWriter(); - /* - * nesting TermsHash instances here to allow the secondary (TermVectors) share the interned postings - * via a shared ByteBlockPool. See TermsHashPerField for details. - */ - final TermsHash termVectorsTermHash = new TermsHash(documentsWriter, false, termVectorsWriter, null); - final InvertedDocConsumer termsHash = new TermsHash(documentsWriter, true, freqProxWriter, termVectorsTermHash); - final NormsWriter normsWriter = new NormsWriter(); - final DocInverter docInverter = new DocInverter(termsHash, normsWriter); - return new DocFieldProcessor(documentsWriter, docInverter); - } - }; - - final DocConsumer consumer; - - // How much RAM we can use before flushing. This is 0 if - // we are flushing by doc count instead. - - private final IndexWriterConfig config; - - private boolean closed; - private FieldInfos fieldInfos; - - private final BufferedDeletesStream bufferedDeletesStream; - private final IndexWriter.FlushControl flushControl; - - DocumentsWriter(IndexWriterConfig config, Directory directory, IndexWriter writer, IndexingChain indexingChain, FieldInfos fieldInfos, + final DocumentsWriterPerThreadPool perThreadPool; + final FlushPolicy flushPolicy; + final DocumentsWriterFlushControl flushControl; + final Healthiness healthiness; + DocumentsWriter(IndexWriterConfig config, Directory directory, IndexWriter writer, FieldNumberBiMap globalFieldNumbers, BufferedDeletesStream bufferedDeletesStream) throws IOException { this.directory = directory; - this.writer = writer; + this.indexWriter = writer; this.similarityProvider = config.getSimilarityProvider(); - this.maxThreadStates = config.getMaxThreadStates(); - this.fieldInfos = fieldInfos; - this.bufferedDeletesStream = bufferedDeletesStream; - flushControl = writer.flushControl; - consumer = config.getIndexingChain().getChain(this); - this.config = config; + this.perThreadPool = config.getIndexerThreadPool(); + this.chain = config.getIndexingChain(); + this.perThreadPool.initialize(this, globalFieldNumbers, config); + final FlushPolicy configuredPolicy = config.getFlushPolicy(); + if (configuredPolicy == null) { + flushPolicy = new FlushByRamOrCountsPolicy(); + } else { + flushPolicy = configuredPolicy; + } + flushPolicy.init(this); + + healthiness = new Healthiness(); + final long maxRamPerDWPT = config.getRAMPerThreadHardLimitMB() * 1024 * 1024; + flushControl = new DocumentsWriterFlushControl(this, healthiness, maxRamPerDWPT); } - // Buffer a specific docID for deletion. Currently only - // used when we hit a exception when adding a document - synchronized void deleteDocID(int docIDUpto) { - pendingDeletes.addDocID(docIDUpto); - // NOTE: we do not trigger flush here. This is - // potentially a RAM leak, if you have an app that tries - // to add docs but every single doc always hits a - // non-aborting exception. Allowing a flush here gets - // very messy because we are only invoked when handling - // exceptions so to do this properly, while handling an - // exception we'd have to go off and flush new deletes - // which is risky (likely would hit some other - // confounding exception). - } - - boolean deleteQueries(Query... queries) { - final boolean doFlush = flushControl.waitUpdate(0, queries.length); - synchronized(this) { - for (Query query : queries) { - pendingDeletes.addQuery(query, numDocs); - } + synchronized void deleteQueries(final Query... queries) throws IOException { + deleteQueue.addDelete(queries); + flushControl.doOnDelete(); + if (flushControl.doApplyAllDeletes()) { + applyAllDeletes(deleteQueue); } - return doFlush; - } - - boolean deleteQuery(Query query) { - final boolean doFlush = flushControl.waitUpdate(0, 1); - synchronized(this) { - pendingDeletes.addQuery(query, numDocs); - } - return doFlush; - } - - boolean deleteTerms(Term... terms) { - final boolean doFlush = flushControl.waitUpdate(0, terms.length); - synchronized(this) { - for (Term term : terms) { - pendingDeletes.addTerm(term, numDocs); - } - } - return doFlush; } // TODO: we could check w/ FreqProxTermsWriter: if the // term doesn't exist, don't bother buffering into the // per-DWPT map (but still must go into the global map) - boolean deleteTerm(Term term, boolean skipWait) { - final boolean doFlush = flushControl.waitUpdate(0, 1, skipWait); - synchronized(this) { - pendingDeletes.addTerm(term, numDocs); + synchronized void deleteTerms(final Term... terms) throws IOException { + final DocumentsWriterDeleteQueue deleteQueue = this.deleteQueue; + deleteQueue.addDelete(terms); + flushControl.doOnDelete(); + if (flushControl.doApplyAllDeletes()) { + applyAllDeletes(deleteQueue); } - return doFlush; } - /** If non-null, various details of indexing are printed - * here. */ + DocumentsWriterDeleteQueue currentDeleteSession() { + return deleteQueue; + } + + private void applyAllDeletes(DocumentsWriterDeleteQueue deleteQueue) throws IOException { + if (deleteQueue != null) { + synchronized (ticketQueue) { + // Freeze and insert the delete flush ticket in the queue + ticketQueue.add(new FlushTicket(deleteQueue.freezeGlobalBuffer(null), false)); + applyFlushTickets(); + } + } + indexWriter.applyAllDeletes(); + indexWriter.flushCount.incrementAndGet(); + } + synchronized void setInfoStream(PrintStream infoStream) { this.infoStream = infoStream; - for(int i=0;i it = perThreadPool.getAllPerThreadsIterator(); + while (it.hasNext()) { + it.next().perThread.docState.infoStream = infoStream; } } - /** Get current segment name we are writing. */ - synchronized String getSegment() { - return segment; - } - /** Returns how many docs are currently buffered in RAM. */ - synchronized int getNumDocs() { - return numDocs; + int getNumDocs() { + return numDocsInRAM.get(); } - void message(String message) { - if (infoStream != null) { - writer.message("DW: " + message); - } + Collection abortedFiles() { + return abortedFiles; } - synchronized void setAborting() { + // returns boolean for asserts + boolean message(String message) { if (infoStream != null) { - message("setAborting"); + indexWriter.message("DW: " + message); + } + return true; + } + + private void ensureOpen() throws AlreadyClosedException { + if (closed) { + throw new AlreadyClosedException("this IndexWriter is closed"); } - aborting = true; } /** Called if we hit an exception at a bad time (when @@ -378,820 +220,335 @@ final class DocumentsWriter { * currently buffered docs. This resets our state, * discarding any docs added since last flush. */ synchronized void abort() throws IOException { - if (infoStream != null) { - message("docWriter: abort"); - } - boolean success = false; + synchronized (this) { + deleteQueue.clear(); + } + try { - - // Forcefully remove waiting ThreadStates from line - waitQueue.abort(); - - // Wait for all other threads to finish with - // DocumentsWriter: - waitIdle(); - if (infoStream != null) { - message("docWriter: abort waitIdle done"); + message("docWriter: abort"); } - assert 0 == waitQueue.numWaiting: "waitQueue.numWaiting=" + waitQueue.numWaiting; + final Iterator threadsIterator = perThreadPool.getActivePerThreadsIterator(); - waitQueue.waitingBytes = 0; - - pendingDeletes.clear(); - - for (DocumentsWriterThreadState threadState : threadStates) + while (threadsIterator.hasNext()) { + ThreadState perThread = threadsIterator.next(); + perThread.lock(); try { - threadState.consumer.abort(); - } catch (Throwable t) { + if (perThread.isActive()) { // we might be closed + perThread.perThread.abort(); + perThread.perThread.checkAndResetHasAborted(); + } else { + assert closed; + } + } finally { + perThread.unlock(); } - - try { - consumer.abort(); - } catch (Throwable t) { } - // Reset all postings data - doAfterFlush(); success = true; } finally { - aborting = false; - notifyAll(); if (infoStream != null) { - message("docWriter: done abort; success=" + success); + message("docWriter: done abort; abortedFiles=" + abortedFiles + " success=" + success); } } } - /** Reset after a flush */ - private void doAfterFlush() throws IOException { - // All ThreadStates should be idle when we are called - assert allThreadsIdle(); - for (DocumentsWriterThreadState threadState : threadStates) { - threadState.consumer.doAfterFlush(); - } - - threadBindings.clear(); - waitQueue.reset(); - segment = null; - fieldInfos = new FieldInfos(fieldInfos); - numDocs = 0; - nextDocID = 0; - bufferIsFull = false; - for(int i=0;i BD - final long delGen = bufferedDeletesStream.getNextGen(); - if (pendingDeletes.any()) { - if (segmentInfos.size() > 0 || newSegment != null) { - final FrozenBufferedDeletes packet = new FrozenBufferedDeletes(pendingDeletes, delGen); - if (infoStream != null) { - message("flush: push buffered deletes startSize=" + pendingDeletes.bytesUsed.get() + " frozenSize=" + packet.bytesUsed); - } - bufferedDeletesStream.push(packet); - if (infoStream != null) { - message("flush: delGen=" + packet.gen); - } - if (newSegment != null) { - newSegment.setBufferedDeletesGen(packet.gen); - } - } else { - if (infoStream != null) { - message("flush: drop buffered deletes: no segments"); - } - // We can safely discard these deletes: since - // there are no segments, the deletions cannot - // affect anything. - } - pendingDeletes.clear(); - } else if (newSegment != null) { - newSegment.setBufferedDeletesGen(delGen); - } + //for testing + public int getNumBufferedDeleteTerms() { + return deleteQueue.numGlobalTermDeletes(); } public boolean anyDeletions() { - return pendingDeletes.any(); + return deleteQueue.anyChanges(); } - /** Flush all pending docs to a new segment */ - // Lock order: IW -> DW - synchronized SegmentInfo flush(IndexWriter writer, IndexFileDeleter deleter, MergePolicy mergePolicy, SegmentInfos segmentInfos) throws IOException { - - final long startTime = System.currentTimeMillis(); - - // We change writer's segmentInfos: - assert Thread.holdsLock(writer); - - waitIdle(); - - if (numDocs == 0) { - // nothing to do! - if (infoStream != null) { - message("flush: no docs; skipping"); - } - // Lock order: IW -> DW -> BD - pushDeletes(null, segmentInfos); - return null; - } - - if (aborting) { - if (infoStream != null) { - message("flush: skip because aborting is set"); - } - return null; - } - - boolean success = false; - - SegmentInfo newSegment; - - try { - assert nextDocID == numDocs; - assert waitQueue.numWaiting == 0; - assert waitQueue.waitingBytes == 0; - - if (infoStream != null) { - message("flush postings as segment " + segment + " numDocs=" + numDocs); - } - - final SegmentWriteState flushState = segWriteState(true); - // Apply delete-by-docID now (delete-byDocID only - // happens when an exception is hit processing that - // doc, eg if analyzer has some problem w/ the text): - if (pendingDeletes.docIDs.size() > 0) { - flushState.deletedDocs = new BitVector(numDocs); - for(int delDocID : pendingDeletes.docIDs) { - flushState.deletedDocs.set(delDocID); - } - pendingDeletes.bytesUsed.addAndGet(-pendingDeletes.docIDs.size() * BufferedDeletes.BYTES_PER_DEL_DOCID); - pendingDeletes.docIDs.clear(); - } - - newSegment = new SegmentInfo(segment, numDocs, directory, false, fieldInfos.hasProx(), flushState.segmentCodecs, false, fieldInfos); - - Collection threads = new HashSet(); - for (DocumentsWriterThreadState threadState : threadStates) { - threads.add(threadState.consumer); - } - - double startMBUsed = bytesUsed()/1024./1024.; - - consumer.flush(threads, flushState); - - newSegment.setHasVectors(flushState.hasVectors); - - if (infoStream != null) { - message("new segment has " + (flushState.hasVectors ? "vectors" : "no vectors")); - if (flushState.deletedDocs != null) { - message("new segment has " + flushState.deletedDocs.count() + " deleted docs"); - } - message("flushedFiles=" + newSegment.files()); - message("flushed codecs=" + newSegment.getSegmentCodecs()); - } - - if (mergePolicy.useCompoundFile(segmentInfos, newSegment)) { - final String cfsFileName = IndexFileNames.segmentFileName(segment, "", IndexFileNames.COMPOUND_FILE_EXTENSION); - - if (infoStream != null) { - message("flush: create compound file \"" + cfsFileName + "\""); - } - - CompoundFileWriter cfsWriter = new CompoundFileWriter(directory, cfsFileName); - for(String fileName : newSegment.files()) { - cfsWriter.addFile(fileName); - } - cfsWriter.close(); - deleter.deleteNewFiles(newSegment.files()); - newSegment.setUseCompoundFile(true); - } - - // Must write deleted docs after the CFS so we don't - // slurp the del file into CFS: - if (flushState.deletedDocs != null) { - final int delCount = flushState.deletedDocs.count(); - assert delCount > 0; - newSegment.setDelCount(delCount); - newSegment.advanceDelGen(); - final String delFileName = newSegment.getDelFileName(); - if (infoStream != null) { - message("flush: write " + delCount + " deletes to " + delFileName); - } - boolean success2 = false; - try { - // TODO: in the NRT case it'd be better to hand - // this del vector over to the - // shortly-to-be-opened SegmentReader and let it - // carry the changes; there's no reason to use - // filesystem as intermediary here. - flushState.deletedDocs.write(directory, delFileName); - success2 = true; - } finally { - if (!success2) { - try { - directory.deleteFile(delFileName); - } catch (Throwable t) { - // suppress this so we keep throwing the - // original exception - } - } - } - } - - if (infoStream != null) { - message("flush: segment=" + newSegment); - final double newSegmentSizeNoStore = newSegment.sizeInBytes(false)/1024./1024.; - final double newSegmentSize = newSegment.sizeInBytes(true)/1024./1024.; - message(" ramUsed=" + nf.format(startMBUsed) + " MB" + - " newFlushedSize=" + nf.format(newSegmentSize) + " MB" + - " (" + nf.format(newSegmentSizeNoStore) + " MB w/o doc stores)" + - " docs/MB=" + nf.format(numDocs / newSegmentSize) + - " new/old=" + nf.format(100.0 * newSegmentSizeNoStore / startMBUsed) + "%"); - } - - success = true; - } finally { - notifyAll(); - if (!success) { - if (segment != null) { - deleter.refresh(segment); - } - abort(); - } - } - - doAfterFlush(); - - // Lock order: IW -> DW -> BD - pushDeletes(newSegment, segmentInfos); - if (infoStream != null) { - message("flush time " + (System.currentTimeMillis()-startTime) + " msec"); - } - - return newSegment; - } - - SegmentWriteState segWriteState(boolean flush) { - return new SegmentWriteState(infoStream, directory, segment, fieldInfos, - numDocs, writer.getConfig().getTermIndexInterval(), - fieldInfos.buildSegmentCodecs(flush), - pendingDeletes, bytesUsed); - } - - synchronized void close() { + void close() { closed = true; - notifyAll(); + flushControl.setClosed(); } - /** Returns a free (idle) ThreadState that may be used for - * indexing this one document. This call also pauses if a - * flush is pending. If delTerm is non-null then we - * buffer this deleted term after the thread state has - * been acquired. */ - synchronized DocumentsWriterThreadState getThreadState(Document doc, Term delTerm) throws IOException { + boolean updateDocument(final Document doc, final Analyzer analyzer, + final Term delTerm) throws CorruptIndexException, IOException { + ensureOpen(); + boolean maybeMerge = false; + final boolean isUpdate = delTerm != null; + if (healthiness.anyStalledThreads()) { - final Thread currentThread = Thread.currentThread(); - assert !Thread.holdsLock(writer); + // Help out flushing any pending DWPTs so we can un-stall: + if (infoStream != null) { + message("WARNING DocumentsWriter has stalled threads; will hijack this thread to flush pending segment(s)"); + } - // First, find a thread state. If this thread already - // has affinity to a specific ThreadState, use that one - // again. - DocumentsWriterThreadState state = threadBindings.get(currentThread); - if (state == null) { - - // First time this thread has called us since last - // flush. Find the least loaded thread state: - DocumentsWriterThreadState minThreadState = null; - for(int i=0;i= maxThreadStates)) { - state = minThreadState; - state.numThreads++; - } else { - // Just create a new "private" thread state - DocumentsWriterThreadState[] newArray = new DocumentsWriterThreadState[1+threadStates.length]; - if (threadStates.length > 0) { - System.arraycopy(threadStates, 0, newArray, 0, threadStates.length); - } - state = newArray[threadStates.length] = new DocumentsWriterThreadState(this); - threadStates = newArray; - } - threadBindings.put(currentThread, state); - } - // Next, wait until my thread state is idle (in case - // it's shared with other threads), and no flush/abort - // pending - waitReady(state); - - // Allocate segment name if this is the first doc since - // last flush: - if (segment == null) { - segment = writer.newSegmentName(); - assert numDocs == 0; - } - - state.docState.docID = nextDocID++; - - if (delTerm != null) { - pendingDeletes.addTerm(delTerm, state.docState.docID); - } - - numDocs++; - state.isIdle = false; - return state; - } - - boolean addDocument(Document doc, Analyzer analyzer) throws CorruptIndexException, IOException { - return updateDocument(doc, analyzer, null); - } - - boolean updateDocument(Document doc, Analyzer analyzer, Term delTerm) - throws CorruptIndexException, IOException { - - // Possibly trigger a flush, or wait until any running flush completes: - boolean doFlush = flushControl.waitUpdate(1, delTerm != null ? 1 : 0); - - // This call is synchronized but fast - final DocumentsWriterThreadState state = getThreadState(doc, delTerm); - - final DocState docState = state.docState; - docState.doc = doc; - docState.analyzer = analyzer; - - boolean success = false; - try { - // This call is not synchronized and does all the - // work - final DocWriter perDoc; - try { - perDoc = state.consumer.processDocument(fieldInfos); - } finally { - docState.clear(); + if (infoStream != null && healthiness.anyStalledThreads()) { + message("WARNING DocumentsWriter still has stalled threads; waiting"); } - // This call is synchronized but fast - finishDocument(state, perDoc); + healthiness.waitIfStalled(); // block if stalled - success = true; - } finally { - if (!success) { - - // If this thread state had decided to flush, we - // must clear it so another thread can flush - if (doFlush) { - flushControl.clearFlushPending(); - } - - if (infoStream != null) { - message("exception in updateDocument aborting=" + aborting); - } - - synchronized(this) { - - state.isIdle = true; - notifyAll(); - - if (aborting) { - abort(); - } else { - skipDocWriter.docID = docState.docID; - boolean success2 = false; - try { - waitQueue.add(skipDocWriter); - success2 = true; - } finally { - if (!success2) { - abort(); - return false; - } - } - - // Immediately mark this document as deleted - // since likely it was partially added. This - // keeps indexing as "all or none" (atomic) when - // adding a document: - deleteDocID(state.docState.docID); - } - } + if (infoStream != null && healthiness.anyStalledThreads()) { + message("WARNING DocumentsWriter done waiting"); } } - doFlush |= flushControl.flushByRAMUsage("new document"); - - return doFlush; - } - - public synchronized void waitIdle() { - while (!allThreadsIdle()) { - try { - wait(); - } catch (InterruptedException ie) { - throw new ThreadInterruptedException(ie); - } - } - } - - synchronized void waitReady(DocumentsWriterThreadState state) { - while (!closed && (!state.isIdle || aborting)) { - try { - wait(); - } catch (InterruptedException ie) { - throw new ThreadInterruptedException(ie); - } - } - - if (closed) { - throw new AlreadyClosedException("this IndexWriter is closed"); - } - } - - /** Does the synchronized work to finish/flush the - * inverted document. */ - private void finishDocument(DocumentsWriterThreadState perThread, DocWriter docWriter) throws IOException { - - // Must call this w/o holding synchronized(this) else - // we'll hit deadlock: - balanceRAM(); - - synchronized(this) { - - assert docWriter == null || docWriter.docID == perThread.docState.docID; - - if (aborting) { - - // We are currently aborting, and another thread is - // waiting for me to become idle. We just forcefully - // idle this threadState; it will be fully reset by - // abort() - if (docWriter != null) { - try { - docWriter.abort(); - } catch (Throwable t) { - } - } - - perThread.isIdle = true; - - // wakes up any threads waiting on the wait queue - notifyAll(); - - return; - } - - final boolean doPause; - - if (docWriter != null) { - doPause = waitQueue.add(docWriter); - } else { - skipDocWriter.docID = perThread.docState.docID; - doPause = waitQueue.add(skipDocWriter); - } - - if (doPause) { - waitForWaitQueue(); - } - - perThread.isIdle = true; - - // wakes up any threads waiting on the wait queue - notifyAll(); - } - } - - synchronized void waitForWaitQueue() { - do { - try { - wait(); - } catch (InterruptedException ie) { - throw new ThreadInterruptedException(ie); - } - } while (!waitQueue.doResume()); - } - - private static class SkipDocWriter extends DocWriter { - @Override - void finish() { - } - @Override - void abort() { - } - @Override - long sizeInBytes() { - return 0; - } - } - final SkipDocWriter skipDocWriter = new SkipDocWriter(); - - NumberFormat nf = NumberFormat.getInstance(); - - /* Initial chunks size of the shared byte[] blocks used to - store postings data */ - final static int BYTE_BLOCK_NOT_MASK = ~BYTE_BLOCK_MASK; - - /* if you increase this, you must fix field cache impl for - * getTerms/getTermsIndex requires <= 32768. */ - final static int MAX_TERM_LENGTH_UTF8 = BYTE_BLOCK_SIZE-2; - - /* Initial chunks size of the shared int[] blocks used to - store postings data */ - final static int INT_BLOCK_SHIFT = 13; - final static int INT_BLOCK_SIZE = 1 << INT_BLOCK_SHIFT; - final static int INT_BLOCK_MASK = INT_BLOCK_SIZE - 1; - - private List freeIntBlocks = new ArrayList(); - - /* Allocate another int[] from the shared pool */ - synchronized int[] getIntBlock() { - final int size = freeIntBlocks.size(); - final int[] b; - if (0 == size) { - b = new int[INT_BLOCK_SIZE]; - bytesUsed.addAndGet(INT_BLOCK_SIZE*RamUsageEstimator.NUM_BYTES_INT); - } else { - b = freeIntBlocks.remove(size-1); - } - return b; - } - - long bytesUsed() { - return bytesUsed.get() + pendingDeletes.bytesUsed.get(); - } - - /* Return int[]s to the pool */ - synchronized void recycleIntBlocks(int[][] blocks, int start, int end) { - for(int i=start;i= ramBufferSize; + try { + + if (!perThread.isActive()) { + ensureOpen(); + assert false: "perThread is not active but we are still open"; + } + + final DocumentsWriterPerThread dwpt = perThread.perThread; + try { + dwpt.updateDocument(doc, analyzer, delTerm); + numDocsInRAM.incrementAndGet(); + } finally { + if (dwpt.checkAndResetHasAborted()) { + flushControl.doOnAbort(perThread); + } + } + flushingDWPT = flushControl.doAfterDocument(perThread, isUpdate); + } finally { + perThread.unlock(); } - - if (doBalance) { - - if (infoStream != null) { - message(" RAM: balance allocations: usedMB=" + toMB(bytesUsed()) + - " vs trigger=" + toMB(ramBufferSize) + - " deletesMB=" + toMB(deletesRAMUsed) + - " byteBlockFree=" + toMB(byteBlockAllocator.bytesUsed()) + - " perDocFree=" + toMB(perDocAllocator.bytesUsed())); + + if (flushingDWPT != null) { + maybeMerge |= doFlush(flushingDWPT); + } else { + final DocumentsWriterPerThread nextPendingFlush = flushControl.nextPendingFlush(); + if (nextPendingFlush != null) { + maybeMerge |= doFlush(nextPendingFlush); } + } + return maybeMerge; + } - final long startBytesUsed = bytesUsed() + deletesRAMUsed; - - int iter = 0; - - // We free equally from each pool in 32 KB - // chunks until we are below our threshold - // (freeLevel) - - boolean any = true; - - final long freeLevel = (long) (0.95 * ramBufferSize); - - while(bytesUsed()+deletesRAMUsed > freeLevel) { + private boolean doFlush(DocumentsWriterPerThread flushingDWPT) throws IOException { + boolean maybeMerge = false; + while (flushingDWPT != null) { + maybeMerge = true; + boolean success = false; + FlushTicket ticket = null; - synchronized(this) { - if (0 == perDocAllocator.numBufferedBlocks() && - 0 == byteBlockAllocator.numBufferedBlocks() && - 0 == freeIntBlocks.size() && !any) { - // Nothing else to free -- must flush now. - bufferIsFull = bytesUsed()+deletesRAMUsed > ramBufferSize; - if (infoStream != null) { - if (bytesUsed()+deletesRAMUsed > ramBufferSize) { - message(" nothing to free; set bufferIsFull"); - } else { - message(" nothing to free"); - } + try { + assert currentFullFlushDelQueue == null + || flushingDWPT.deleteQueue == currentFullFlushDelQueue : "expected: " + + currentFullFlushDelQueue + "but was: " + flushingDWPT.deleteQueue + + " " + flushControl.isFullFlush(); + /* + * Since with DWPT the flush process is concurrent and several DWPT + * could flush at the same time we must maintain the order of the + * flushes before we can apply the flushed segment and the frozen global + * deletes it is buffering. The reason for this is that the global + * deletes mark a certain point in time where we took a DWPT out of + * rotation and freeze the global deletes. + * + * Example: A flush 'A' starts and freezes the global deletes, then + * flush 'B' starts and freezes all deletes occurred since 'A' has + * started. if 'B' finishes before 'A' we need to wait until 'A' is done + * otherwise the deletes frozen by 'B' are not applied to 'A' and we + * might miss to deletes documents in 'A'. + */ + try { + synchronized (ticketQueue) { + // Each flush is assigned a ticket in the order they accquire the ticketQueue lock + ticket = new FlushTicket(flushingDWPT.prepareFlush(), true); + ticketQueue.add(ticket); + } + + // flush concurrently without locking + final FlushedSegment newSegment = flushingDWPT.flush(); + synchronized (ticketQueue) { + ticket.segment = newSegment; + } + // flush was successful once we reached this point - new seg. has been assigned to the ticket! + success = true; + } finally { + if (!success && ticket != null) { + synchronized (ticketQueue) { + // In the case of a failure make sure we are making progress and + // apply all the deletes since the segment flush failed since the flush + // ticket could hold global deletes see FlushTicket#canPublish() + ticket.isSegmentFlush = false; } - break; - } - - if ((0 == iter % 4) && byteBlockAllocator.numBufferedBlocks() > 0) { - byteBlockAllocator.freeBlocks(1); - } - if ((1 == iter % 4) && freeIntBlocks.size() > 0) { - freeIntBlocks.remove(freeIntBlocks.size()-1); - bytesUsed.addAndGet(-INT_BLOCK_SIZE * RamUsageEstimator.NUM_BYTES_INT); - } - if ((2 == iter % 4) && perDocAllocator.numBufferedBlocks() > 0) { - perDocAllocator.freeBlocks(32); // Remove upwards of 32 blocks (each block is 1K) } } - - if ((3 == iter % 4) && any) { - // Ask consumer to free any recycled state - any = consumer.freeRAM(); - } - - iter++; + /* + * Now we are done and try to flush the ticket queue if the head of the + * queue has already finished the flush. + */ + applyFlushTickets(); + } finally { + flushControl.doAfterFlush(flushingDWPT); + flushingDWPT.checkAndResetHasAborted(); + indexWriter.flushCount.incrementAndGet(); } + + flushingDWPT = flushControl.nextPendingFlush(); + } + return maybeMerge; + } - if (infoStream != null) { - message(" after free: freedMB=" + nf.format((startBytesUsed-bytesUsed()-deletesRAMUsed)/1024./1024.) + " usedMB=" + nf.format((bytesUsed()+deletesRAMUsed)/1024./1024.)); + private void applyFlushTickets() throws IOException { + synchronized (ticketQueue) { + while (true) { + // Keep publishing eligible flushed segments: + final FlushTicket head = ticketQueue.peek(); + if (head != null && head.canPublish()) { + ticketQueue.poll(); + finishFlush(head.segment, head.frozenDeletes); + } else { + break; + } } } } - final WaitQueue waitQueue = new WaitQueue(); - - private class WaitQueue { - DocWriter[] waiting; - int nextWriteDocID; - int nextWriteLoc; - int numWaiting; - long waitingBytes; - - public WaitQueue() { - waiting = new DocWriter[10]; - } - - synchronized void reset() { - // NOTE: nextWriteLoc doesn't need to be reset - assert numWaiting == 0; - assert waitingBytes == 0; - nextWriteDocID = 0; - } - - synchronized boolean doResume() { - final double mb = config.getRAMBufferSizeMB(); - final long waitQueueResumeBytes; - if (mb == IndexWriterConfig.DISABLE_AUTO_FLUSH) { - waitQueueResumeBytes = 2*1024*1024; - } else { - waitQueueResumeBytes = (long) (mb*1024*1024*0.05); - } - return waitingBytes <= waitQueueResumeBytes; - } - - synchronized boolean doPause() { - final double mb = config.getRAMBufferSizeMB(); - final long waitQueuePauseBytes; - if (mb == IndexWriterConfig.DISABLE_AUTO_FLUSH) { - waitQueuePauseBytes = 4*1024*1024; - } else { - waitQueuePauseBytes = (long) (mb*1024*1024*0.1); - } - return waitingBytes > waitQueuePauseBytes; - } - - synchronized void abort() { - int count = 0; - for(int i=0;i BDS so that the {@link SegmentInfo}'s + * delete generation is always GlobalPacket_deleteGeneration + 1 + */ + private void publishFlushedSegment(FlushedSegment newSegment, FrozenBufferedDeletes globalPacket) + throws IOException { + assert newSegment != null; + final SegmentInfo segInfo = indexWriter.prepareFlushedSegment(newSegment); + final BufferedDeletes deletes = newSegment.segmentDeletes; + FrozenBufferedDeletes packet = null; + if (deletes != null && deletes.any()) { + // Segment private delete + packet = new FrozenBufferedDeletes(deletes, true); + if (infoStream != null) { + message("flush: push buffered seg private deletes: " + packet); } } - synchronized public boolean add(DocWriter doc) throws IOException { + // now publish! + indexWriter.publishFlushedSegment(segInfo, packet, globalPacket); + } + + // for asserts + private volatile DocumentsWriterDeleteQueue currentFullFlushDelQueue = null; + // for asserts + private synchronized boolean setFlushingDeleteQueue(DocumentsWriterDeleteQueue session) { + currentFullFlushDelQueue = session; + return true; + } + + /* + * FlushAllThreads is synced by IW fullFlushLock. Flushing all threads is a + * two stage operation; the caller must ensure (in try/finally) that finishFlush + * is called after this method, to release the flush lock in DWFlushControl + */ + final boolean flushAllThreads() + throws IOException { + final DocumentsWriterDeleteQueue flushingDeleteQueue; - assert doc.docID >= nextWriteDocID; - - if (doc.docID == nextWriteDocID) { - writeDocument(doc); - while(true) { - doc = waiting[nextWriteLoc]; - if (doc != null) { - numWaiting--; - waiting[nextWriteLoc] = null; - waitingBytes -= doc.sizeInBytes(); - writeDocument(doc); - } else { - break; - } - } - } else { - - // I finished before documents that were added - // before me. This can easily happen when I am a - // small doc and the docs before me were large, or, - // just due to luck in the thread scheduling. Just - // add myself to the queue and when that large doc - // finishes, it will flush me: - int gap = doc.docID - nextWriteDocID; - if (gap >= waiting.length) { - // Grow queue - DocWriter[] newArray = new DocWriter[ArrayUtil.oversize(gap, RamUsageEstimator.NUM_BYTES_OBJECT_REF)]; - assert nextWriteLoc >= 0; - System.arraycopy(waiting, nextWriteLoc, newArray, 0, waiting.length-nextWriteLoc); - System.arraycopy(waiting, 0, newArray, waiting.length-nextWriteLoc, nextWriteLoc); - nextWriteLoc = 0; - waiting = newArray; - gap = doc.docID - nextWriteDocID; - } - - int loc = nextWriteLoc + gap; - if (loc >= waiting.length) { - loc -= waiting.length; - } - - // We should only wrap one time - assert loc < waiting.length; - - // Nobody should be in my spot! - assert waiting[loc] == null; - waiting[loc] = doc; - numWaiting++; - waitingBytes += doc.sizeInBytes(); + synchronized (this) { + flushingDeleteQueue = deleteQueue; + /* Cutover to a new delete queue. This must be synced on the flush control + * otherwise a new DWPT could sneak into the loop with an already flushing + * delete queue */ + flushControl.markForFullFlush(); // swaps the delQueue synced on FlushControl + assert setFlushingDeleteQueue(flushingDeleteQueue); + } + assert currentFullFlushDelQueue != null; + assert currentFullFlushDelQueue != deleteQueue; + + boolean anythingFlushed = false; + try { + DocumentsWriterPerThread flushingDWPT; + // Help out with flushing: + while ((flushingDWPT = flushControl.nextPendingFlush()) != null) { + anythingFlushed |= doFlush(flushingDWPT); } - - return doPause(); + // If a concurrent flush is still in flight wait for it + while (flushControl.anyFlushing()) { + flushControl.waitForFlush(); + } + if (!anythingFlushed) { // apply deletes if we did not flush any document + synchronized (ticketQueue) { + ticketQueue.add(new FlushTicket(flushingDeleteQueue.freezeGlobalBuffer(null), false)); + } + applyFlushTickets(); + } + } finally { + assert flushingDeleteQueue == currentFullFlushDelQueue; + } + return anythingFlushed; + } + + final void finishFullFlush(boolean success) { + assert setFlushingDeleteQueue(null); + if (success) { + // Release the flush lock + flushControl.finishFullFlush(); + } else { + flushControl.abortFullFlushes(); + } + } + + static final class FlushTicket { + final FrozenBufferedDeletes frozenDeletes; + /* access to non-final members must be synchronized on DW#ticketQueue */ + FlushedSegment segment; + boolean isSegmentFlush; + + FlushTicket(FrozenBufferedDeletes frozenDeletes, boolean isSegmentFlush) { + this.frozenDeletes = frozenDeletes; + this.isSegmentFlush = isSegmentFlush; + } + + boolean canPublish() { + return (!isSegmentFlush || segment != null); } } } diff --git a/lucene/src/java/org/apache/lucene/index/DocumentsWriterDeleteQueue.java b/lucene/src/java/org/apache/lucene/index/DocumentsWriterDeleteQueue.java new file mode 100644 index 00000000000..486c12659f7 --- /dev/null +++ b/lucene/src/java/org/apache/lucene/index/DocumentsWriterDeleteQueue.java @@ -0,0 +1,396 @@ +package org.apache.lucene.index; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to You under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ +import java.util.concurrent.atomic.AtomicReferenceFieldUpdater; +import java.util.concurrent.locks.ReentrantLock; + +import org.apache.lucene.search.Query; + +/** + * {@link DocumentsWriterDeleteQueue} is a non-blocking linked pending deletes + * queue. In contrast to other queue implementation we only maintain the + * tail of the queue. A delete queue is always used in a context of a set of + * DWPTs and a global delete pool. Each of the DWPT and the global pool need to + * maintain their 'own' head of the queue (as a DeleteSlice instance per DWPT). + * The difference between the DWPT and the global pool is that the DWPT starts + * maintaining a head once it has added its first document since for its segments + * private deletes only the deletes after that document are relevant. The global + * pool instead starts maintaining the head once this instance is created by + * taking the sentinel instance as its initial head. + *

+ * Since each {@link DeleteSlice} maintains its own head and the list is only + * single linked the garbage collector takes care of pruning the list for us. + * All nodes in the list that are still relevant should be either directly or + * indirectly referenced by one of the DWPT's private {@link DeleteSlice} or by + * the global {@link BufferedDeletes} slice. + *

+ * Each DWPT as well as the global delete pool maintain their private + * DeleteSlice instance. In the DWPT case updating a slice is equivalent to + * atomically finishing the document. The slice update guarantees a "happens + * before" relationship to all other updates in the same indexing session. When a + * DWPT updates a document it: + * + *

    + *
  1. consumes a document and finishes its processing
  2. + *
  3. updates its private {@link DeleteSlice} either by calling + * {@link #updateSlice(DeleteSlice)} or {@link #add(Term, DeleteSlice)} (if the + * document has a delTerm)
  4. + *
  5. applies all deletes in the slice to its private {@link BufferedDeletes} + * and resets it
  6. + *
  7. increments its internal document id
  8. + *
+ * + * The DWPT also doesn't apply its current documents delete term until it has + * updated its delete slice which ensures the consistency of the update. If the + * update fails before the DeleteSlice could have been updated the deleteTerm + * will also not be added to its private deletes neither to the global deletes. + * + */ +final class DocumentsWriterDeleteQueue { + + private volatile Node tail; + + private static final AtomicReferenceFieldUpdater tailUpdater = AtomicReferenceFieldUpdater + .newUpdater(DocumentsWriterDeleteQueue.class, Node.class, "tail"); + + private final DeleteSlice globalSlice; + private final BufferedDeletes globalBufferedDeletes; + /* only acquired to update the global deletes */ + private final ReentrantLock globalBufferLock = new ReentrantLock(); + + final long generation; + + DocumentsWriterDeleteQueue() { + this(0); + } + + DocumentsWriterDeleteQueue(long generation) { + this(new BufferedDeletes(false), generation); + } + + DocumentsWriterDeleteQueue(BufferedDeletes globalBufferedDeletes, long generation) { + this.globalBufferedDeletes = globalBufferedDeletes; + this.generation = generation; + /* + * we use a sentinel instance as our initial tail. No slice will ever try to + * apply this tail since the head is always omitted. + */ + tail = new Node(null); // sentinel + globalSlice = new DeleteSlice(tail); + } + + void addDelete(Query... queries) { + add(new QueryArrayNode(queries)); + tryApplyGlobalSlice(); + } + + void addDelete(Term... terms) { + add(new TermArrayNode(terms)); + tryApplyGlobalSlice(); + } + + /** + * invariant for document update + */ + void add(Term term, DeleteSlice slice) { + final TermNode termNode = new TermNode(term); + add(termNode); + /* + * this is an update request where the term is the updated documents + * delTerm. in that case we need to guarantee that this insert is atomic + * with regards to the given delete slice. This means if two threads try to + * update the same document with in turn the same delTerm one of them must + * win. By taking the node we have created for our del term as the new tail + * it is guaranteed that if another thread adds the same right after us we + * will apply this delete next time we update our slice and one of the two + * competing updates wins! + */ + slice.sliceTail = termNode; + assert slice.sliceHead != slice.sliceTail : "slice head and tail must differ after add"; + tryApplyGlobalSlice(); // TODO doing this each time is not necessary maybe + // we can do it just every n times or so? + } + + void add(Node item) { + /* + * this non-blocking / 'wait-free' linked list add was inspired by Apache + * Harmony's ConcurrentLinkedQueue Implementation. + */ + while (true) { + final Node currentTail = this.tail; + final Node tailNext = currentTail.next; + if (tail == currentTail) { + if (tailNext != null) { + /* + * we are in intermediate state here. the tails next pointer has been + * advanced but the tail itself might not be updated yet. help to + * advance the tail and try again updating it. + */ + tailUpdater.compareAndSet(this, currentTail, tailNext); // can fail + } else { + /* + * we are in quiescent state and can try to insert the item to the + * current tail if we fail to insert we just retry the operation since + * somebody else has already added its item + */ + if (currentTail.casNext(null, item)) { + /* + * now that we are done we need to advance the tail while another + * thread could have advanced it already so we can ignore the return + * type of this CAS call + */ + tailUpdater.compareAndSet(this, currentTail, item); + return; + } + } + } + } + } + + boolean anyChanges() { + globalBufferLock.lock(); + try { + return !globalSlice.isEmpty() || globalBufferedDeletes.any(); + } finally { + globalBufferLock.unlock(); + } + } + + void tryApplyGlobalSlice() { + if (globalBufferLock.tryLock()) { + /* + * The global buffer must be locked but we don't need to upate them if + * there is an update going on right now. It is sufficient to apply the + * deletes that have been added after the current in-flight global slices + * tail the next time we can get the lock! + */ + try { + if (updateSlice(globalSlice)) { + globalSlice.apply(globalBufferedDeletes, BufferedDeletes.MAX_INT); + } + } finally { + globalBufferLock.unlock(); + } + } + } + + FrozenBufferedDeletes freezeGlobalBuffer(DeleteSlice callerSlice) { + globalBufferLock.lock(); + /* + * Here we freeze the global buffer so we need to lock it, apply all + * deletes in the queue and reset the global slice to let the GC prune the + * queue. + */ + final Node currentTail = tail; // take the current tail make this local any + // Changes after this call are applied later + // and not relevant here + if (callerSlice != null) { + // Update the callers slices so we are on the same page + callerSlice.sliceTail = currentTail; + } + try { + if (globalSlice.sliceTail != currentTail) { + globalSlice.sliceTail = currentTail; + globalSlice.apply(globalBufferedDeletes, BufferedDeletes.MAX_INT); + } + + final FrozenBufferedDeletes packet = new FrozenBufferedDeletes( + globalBufferedDeletes, false); + globalBufferedDeletes.clear(); + return packet; + } finally { + globalBufferLock.unlock(); + } + } + + DeleteSlice newSlice() { + return new DeleteSlice(tail); + } + + boolean updateSlice(DeleteSlice slice) { + if (slice.sliceTail != tail) { // If we are the same just + slice.sliceTail = tail; + return true; + } + return false; + } + + static class DeleteSlice { + // No need to be volatile, slices are thread captive (only accessed by one thread)! + Node sliceHead; // we don't apply this one + Node sliceTail; + + DeleteSlice(Node currentTail) { + assert currentTail != null; + /* + * Initially this is a 0 length slice pointing to the 'current' tail of + * the queue. Once we update the slice we only need to assign the tail and + * have a new slice + */ + sliceHead = sliceTail = currentTail; + } + + void apply(BufferedDeletes del, int docIDUpto) { + if (sliceHead == sliceTail) { + // 0 length slice + return; + } + /* + * When we apply a slice we take the head and get its next as our first + * item to apply and continue until we applied the tail. If the head and + * tail in this slice are not equal then there will be at least one more + * non-null node in the slice! + */ + Node current = sliceHead; + do { + current = current.next; + assert current != null : "slice property violated between the head on the tail must not be a null node"; + current.apply(del, docIDUpto); + } while (current != sliceTail); + reset(); + } + + void reset() { + // Reset to a 0 length slice + sliceHead = sliceTail; + } + + /** + * Returns true iff the given item is identical to the item + * hold by the slices tail, otherwise false. + */ + boolean isTailItem(Object item) { + return sliceTail.item == item; + } + + boolean isEmpty() { + return sliceHead == sliceTail; + } + } + + public int numGlobalTermDeletes() { + return globalBufferedDeletes.numTermDeletes.get(); + } + + void clear() { + globalBufferLock.lock(); + try { + final Node currentTail = tail; + globalSlice.sliceHead = globalSlice.sliceTail = currentTail; + globalBufferedDeletes.clear(); + } finally { + globalBufferLock.unlock(); + } + } + + private static class Node { + volatile Node next; + final Object item; + + private Node(Object item) { + this.item = item; + } + + static final AtomicReferenceFieldUpdater nextUpdater = AtomicReferenceFieldUpdater + .newUpdater(Node.class, Node.class, "next"); + + void apply(BufferedDeletes bufferedDeletes, int docIDUpto) { + assert false : "sentinel item must never be applied"; + } + + boolean casNext(Node cmp, Node val) { + return nextUpdater.compareAndSet(this, cmp, val); + } + } + + private static final class TermNode extends Node { + + TermNode(Term term) { + super(term); + } + + @Override + void apply(BufferedDeletes bufferedDeletes, int docIDUpto) { + bufferedDeletes.addTerm((Term) item, docIDUpto); + } + } + + private static final class QueryArrayNode extends Node { + QueryArrayNode(Query[] query) { + super(query); + } + + @Override + void apply(BufferedDeletes bufferedDeletes, int docIDUpto) { + final Query[] queries = (Query[]) item; + for (Query query : queries) { + bufferedDeletes.addQuery(query, docIDUpto); + } + } + } + + private static final class TermArrayNode extends Node { + TermArrayNode(Term[] term) { + super(term); + } + + @Override + void apply(BufferedDeletes bufferedDeletes, int docIDUpto) { + final Term[] terms = (Term[]) item; + for (Term term : terms) { + bufferedDeletes.addTerm(term, docIDUpto); + } + } + } + + + private boolean forceApplyGlobalSlice() { + globalBufferLock.lock(); + final Node currentTail = tail; + try { + if (globalSlice.sliceTail != currentTail) { + globalSlice.sliceTail = currentTail; + globalSlice.apply(globalBufferedDeletes, BufferedDeletes.MAX_INT); + } + return globalBufferedDeletes.any(); + } finally { + globalBufferLock.unlock(); + } + } + + public int getBufferedDeleteTermsSize() { + globalBufferLock.lock(); + try { + forceApplyGlobalSlice(); + return globalBufferedDeletes.terms.size(); + } finally { + globalBufferLock.unlock(); + } + } + + public long bytesUsed() { + return globalBufferedDeletes.bytesUsed.get(); + } + + @Override + public String toString() { + return "DWDQ: [ generation: " + generation + " ]"; + } + + +} diff --git a/lucene/src/java/org/apache/lucene/index/DocumentsWriterFlushControl.java b/lucene/src/java/org/apache/lucene/index/DocumentsWriterFlushControl.java new file mode 100644 index 00000000000..443df5139ca --- /dev/null +++ b/lucene/src/java/org/apache/lucene/index/DocumentsWriterFlushControl.java @@ -0,0 +1,394 @@ +package org.apache.lucene.index; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +import java.util.ArrayList; +import java.util.HashMap; +import java.util.Iterator; +import java.util.LinkedList; +import java.util.Queue; +import java.util.concurrent.atomic.AtomicBoolean; + +import org.apache.lucene.index.DocumentsWriterPerThreadPool.ThreadState; +import org.apache.lucene.util.ThreadInterruptedException; + +/** + * This class controls {@link DocumentsWriterPerThread} flushing during + * indexing. It tracks the memory consumption per + * {@link DocumentsWriterPerThread} and uses a configured {@link FlushPolicy} to + * decide if a {@link DocumentsWriterPerThread} must flush. + *

+ * In addition to the {@link FlushPolicy} the flush control might set certain + * {@link DocumentsWriterPerThread} as flush pending iff a + * {@link DocumentsWriterPerThread} exceeds the + * {@link IndexWriterConfig#getRAMPerThreadHardLimitMB()} to prevent address + * space exhaustion. + */ +public final class DocumentsWriterFlushControl { + + private final long hardMaxBytesPerDWPT; + private long activeBytes = 0; + private long flushBytes = 0; + private volatile int numPending = 0; + private volatile int numFlushing = 0; + final AtomicBoolean flushDeletes = new AtomicBoolean(false); + private boolean fullFlush = false; + private Queue flushQueue = new LinkedList(); + // only for safety reasons if a DWPT is close to the RAM limit + private Queue blockedFlushes = new LinkedList(); + + + long peakActiveBytes = 0;// only with assert + long peakFlushBytes = 0;// only with assert + long peakNetBytes = 0;// only with assert + private final Healthiness healthiness; + private final DocumentsWriterPerThreadPool perThreadPool; + private final FlushPolicy flushPolicy; + private boolean closed = false; + private final HashMap flushingWriters = new HashMap(); + private final DocumentsWriter documentsWriter; + + DocumentsWriterFlushControl(DocumentsWriter documentsWriter, + Healthiness healthiness, long hardMaxBytesPerDWPT) { + this.healthiness = healthiness; + this.perThreadPool = documentsWriter.perThreadPool; + this.flushPolicy = documentsWriter.flushPolicy; + this.hardMaxBytesPerDWPT = hardMaxBytesPerDWPT; + this.documentsWriter = documentsWriter; + } + + public synchronized long activeBytes() { + return activeBytes; + } + + public synchronized long flushBytes() { + return flushBytes; + } + + public synchronized long netBytes() { + return flushBytes + activeBytes; + } + + private void commitPerThreadBytes(ThreadState perThread) { + final long delta = perThread.perThread.bytesUsed() + - perThread.bytesUsed; + perThread.bytesUsed += delta; + /* + * We need to differentiate here if we are pending since setFlushPending + * moves the perThread memory to the flushBytes and we could be set to + * pending during a delete + */ + if (perThread.flushPending) { + flushBytes += delta; + } else { + activeBytes += delta; + } + assert updatePeaks(delta); + } + + // only for asserts + private boolean updatePeaks(long delta) { + peakActiveBytes = Math.max(peakActiveBytes, activeBytes); + peakFlushBytes = Math.max(peakFlushBytes, flushBytes); + peakNetBytes = Math.max(peakNetBytes, netBytes()); + return true; + } + + synchronized DocumentsWriterPerThread doAfterDocument(ThreadState perThread, + boolean isUpdate) { + commitPerThreadBytes(perThread); + if (!perThread.flushPending) { + if (isUpdate) { + flushPolicy.onUpdate(this, perThread); + } else { + flushPolicy.onInsert(this, perThread); + } + if (!perThread.flushPending && perThread.bytesUsed > hardMaxBytesPerDWPT) { + // Safety check to prevent a single DWPT exceeding its RAM limit. This + // is super important since we can not address more than 2048 MB per DWPT + setFlushPending(perThread); + if (fullFlush) { + DocumentsWriterPerThread toBlock = internalTryCheckOutForFlush(perThread, false); + assert toBlock != null; + blockedFlushes.add(toBlock); + } + } + } + final DocumentsWriterPerThread flushingDWPT = tryCheckoutForFlush(perThread, false); + healthiness.updateStalled(this); + return flushingDWPT; + } + + synchronized void doAfterFlush(DocumentsWriterPerThread dwpt) { + assert flushingWriters.containsKey(dwpt); + try { + numFlushing--; + Long bytes = flushingWriters.remove(dwpt); + flushBytes -= bytes.longValue(); + perThreadPool.recycle(dwpt); + healthiness.updateStalled(this); + } finally { + notifyAll(); + } + } + + public synchronized boolean anyFlushing() { + return numFlushing != 0; + } + + public synchronized void waitForFlush() { + if (numFlushing != 0) { + try { + this.wait(); + } catch (InterruptedException e) { + throw new ThreadInterruptedException(e); + } + } + } + + /** + * Sets flush pending state on the given {@link ThreadState}. The + * {@link ThreadState} must have indexed at least on Document and must not be + * already pending. + */ + public synchronized void setFlushPending(ThreadState perThread) { + assert !perThread.flushPending; + if (perThread.perThread.getNumDocsInRAM() > 0) { + perThread.flushPending = true; // write access synced + final long bytes = perThread.bytesUsed; + flushBytes += bytes; + activeBytes -= bytes; + numPending++; // write access synced + } // don't assert on numDocs since we could hit an abort excp. while selecting that dwpt for flushing + + } + + synchronized void doOnAbort(ThreadState state) { + if (state.flushPending) { + flushBytes -= state.bytesUsed; + } else { + activeBytes -= state.bytesUsed; + } + // Take it out of the loop this DWPT is stale + perThreadPool.replaceForFlush(state, closed); + healthiness.updateStalled(this); + } + + synchronized DocumentsWriterPerThread tryCheckoutForFlush( + ThreadState perThread, boolean setPending) { + if (fullFlush) { + return null; + } + return internalTryCheckOutForFlush(perThread, setPending); + } + + private DocumentsWriterPerThread internalTryCheckOutForFlush( + ThreadState perThread, boolean setPending) { + if (setPending && !perThread.flushPending) { + setFlushPending(perThread); + } + if (perThread.flushPending) { + // We are pending so all memory is already moved to flushBytes + if (perThread.tryLock()) { + try { + if (perThread.isActive()) { + assert perThread.isHeldByCurrentThread(); + final DocumentsWriterPerThread dwpt; + final long bytes = perThread.bytesUsed; // do that before + // replace! + dwpt = perThreadPool.replaceForFlush(perThread, closed); + assert !flushingWriters.containsKey(dwpt) : "DWPT is already flushing"; + // Record the flushing DWPT to reduce flushBytes in doAfterFlush + flushingWriters.put(dwpt, Long.valueOf(bytes)); + numPending--; // write access synced + numFlushing++; + return dwpt; + } + } finally { + perThread.unlock(); + } + } + } + return null; + } + + @Override + public String toString() { + return "DocumentsWriterFlushControl [activeBytes=" + activeBytes + + ", flushBytes=" + flushBytes + "]"; + } + + DocumentsWriterPerThread nextPendingFlush() { + synchronized (this) { + DocumentsWriterPerThread poll = flushQueue.poll(); + if (poll != null) { + return poll; + } + } + if (numPending > 0) { + final Iterator allActiveThreads = perThreadPool + .getActivePerThreadsIterator(); + while (allActiveThreads.hasNext() && numPending > 0) { + ThreadState next = allActiveThreads.next(); + if (next.flushPending) { + final DocumentsWriterPerThread dwpt = tryCheckoutForFlush(next, false); + if (dwpt != null) { + return dwpt; + } + } + } + } + return null; + } + + synchronized void setClosed() { + // set by DW to signal that we should not release new DWPT after close + this.closed = true; + } + + /** + * Returns an iterator that provides access to all currently active {@link ThreadState}s + */ + public Iterator allActiveThreads() { + return perThreadPool.getActivePerThreadsIterator(); + } + + synchronized void doOnDelete() { + // pass null this is a global delete no update + flushPolicy.onDelete(this, null); + } + + /** + * Returns the number of delete terms in the global pool + */ + public int getNumGlobalTermDeletes() { + return documentsWriter.deleteQueue.numGlobalTermDeletes(); + } + + int numFlushingDWPT() { + return numFlushing; + } + + public boolean doApplyAllDeletes() { + return flushDeletes.getAndSet(false); + } + + public void setApplyAllDeletes() { + flushDeletes.set(true); + } + + int numActiveDWPT() { + return this.perThreadPool.getMaxThreadStates(); + } + + void markForFullFlush() { + final DocumentsWriterDeleteQueue flushingQueue; + synchronized (this) { + assert !fullFlush; + fullFlush = true; + flushingQueue = documentsWriter.deleteQueue; + // Set a new delete queue - all subsequent DWPT will use this queue until + // we do another full flush + DocumentsWriterDeleteQueue newQueue = new DocumentsWriterDeleteQueue(flushingQueue.generation+1); + documentsWriter.deleteQueue = newQueue; + } + final Iterator allActiveThreads = perThreadPool + .getActivePerThreadsIterator(); + final ArrayList toFlush = new ArrayList(); + while (allActiveThreads.hasNext()) { + final ThreadState next = allActiveThreads.next(); + next.lock(); + try { + if (!next.isActive()) { + continue; + } + assert next.perThread.deleteQueue == flushingQueue + || next.perThread.deleteQueue == documentsWriter.deleteQueue : " flushingQueue: " + + flushingQueue + + " currentqueue: " + + documentsWriter.deleteQueue + + " perThread queue: " + + next.perThread.deleteQueue + + " numDocsInRam: " + next.perThread.getNumDocsInRAM(); + if (next.perThread.deleteQueue != flushingQueue) { + // this one is already a new DWPT + continue; + } + if (next.perThread.getNumDocsInRAM() > 0 ) { + final DocumentsWriterPerThread dwpt = next.perThread; // just for assert + final DocumentsWriterPerThread flushingDWPT = internalTryCheckOutForFlush(next, true); + assert flushingDWPT != null : "DWPT must never be null here since we hold the lock and it holds documents"; + assert dwpt == flushingDWPT : "flushControl returned different DWPT"; + toFlush.add(flushingDWPT); + } else { + // get the new delete queue from DW + next.perThread.initialize(); + } + } finally { + next.unlock(); + } + } + synchronized (this) { + assert assertBlockedFlushes(flushingQueue); + flushQueue.addAll(blockedFlushes); + blockedFlushes.clear(); + flushQueue.addAll(toFlush); + } + } + + synchronized void finishFullFlush() { + assert fullFlush; + assert flushQueue.isEmpty(); + try { + if (!blockedFlushes.isEmpty()) { + assert assertBlockedFlushes(documentsWriter.deleteQueue); + flushQueue.addAll(blockedFlushes); + blockedFlushes.clear(); + } + } finally { + fullFlush = false; + } + } + + boolean assertBlockedFlushes(DocumentsWriterDeleteQueue flushingQueue) { + Queue flushes = this.blockedFlushes; + for (DocumentsWriterPerThread documentsWriterPerThread : flushes) { + assert documentsWriterPerThread.deleteQueue == flushingQueue; + } + return true; + } + + synchronized void abortFullFlushes() { + try { + for (DocumentsWriterPerThread dwpt : flushQueue) { + doAfterFlush(dwpt); + } + for (DocumentsWriterPerThread dwpt : blockedFlushes) { + doAfterFlush(dwpt); + } + + } finally { + fullFlush = false; + flushQueue.clear(); + blockedFlushes.clear(); + } + } + + synchronized boolean isFullFlush() { + return fullFlush; + } +} \ No newline at end of file diff --git a/lucene/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java b/lucene/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java new file mode 100644 index 00000000000..e943055bc37 --- /dev/null +++ b/lucene/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java @@ -0,0 +1,501 @@ +package org.apache.lucene.index; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import static org.apache.lucene.util.ByteBlockPool.BYTE_BLOCK_MASK; +import static org.apache.lucene.util.ByteBlockPool.BYTE_BLOCK_SIZE; + +import java.io.IOException; +import java.io.PrintStream; +import java.text.NumberFormat; +import java.util.concurrent.atomic.AtomicLong; + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.index.DocumentsWriterDeleteQueue.DeleteSlice; +import org.apache.lucene.search.SimilarityProvider; +import org.apache.lucene.store.Directory; +import org.apache.lucene.util.BitVector; +import org.apache.lucene.util.ByteBlockPool.Allocator; +import org.apache.lucene.util.RamUsageEstimator; + +public class DocumentsWriterPerThread { + + /** + * The IndexingChain must define the {@link #getChain(DocumentsWriter)} method + * which returns the DocConsumer that the DocumentsWriter calls to process the + * documents. + */ + abstract static class IndexingChain { + abstract DocConsumer getChain(DocumentsWriterPerThread documentsWriterPerThread); + } + + + static final IndexingChain defaultIndexingChain = new IndexingChain() { + + @Override + DocConsumer getChain(DocumentsWriterPerThread documentsWriterPerThread) { + /* + This is the current indexing chain: + + DocConsumer / DocConsumerPerThread + --> code: DocFieldProcessor / DocFieldProcessorPerThread + --> DocFieldConsumer / DocFieldConsumerPerThread / DocFieldConsumerPerField + --> code: DocFieldConsumers / DocFieldConsumersPerThread / DocFieldConsumersPerField + --> code: DocInverter / DocInverterPerThread / DocInverterPerField + --> InvertedDocConsumer / InvertedDocConsumerPerThread / InvertedDocConsumerPerField + --> code: TermsHash / TermsHashPerThread / TermsHashPerField + --> TermsHashConsumer / TermsHashConsumerPerThread / TermsHashConsumerPerField + --> code: FreqProxTermsWriter / FreqProxTermsWriterPerThread / FreqProxTermsWriterPerField + --> code: TermVectorsTermsWriter / TermVectorsTermsWriterPerThread / TermVectorsTermsWriterPerField + --> InvertedDocEndConsumer / InvertedDocConsumerPerThread / InvertedDocConsumerPerField + --> code: NormsWriter / NormsWriterPerThread / NormsWriterPerField + --> code: StoredFieldsWriter / StoredFieldsWriterPerThread / StoredFieldsWriterPerField + */ + + // Build up indexing chain: + + final TermsHashConsumer termVectorsWriter = new TermVectorsTermsWriter(documentsWriterPerThread); + final TermsHashConsumer freqProxWriter = new FreqProxTermsWriter(); + + final InvertedDocConsumer termsHash = new TermsHash(documentsWriterPerThread, freqProxWriter, true, + new TermsHash(documentsWriterPerThread, termVectorsWriter, false, null)); + final NormsWriter normsWriter = new NormsWriter(); + final DocInverter docInverter = new DocInverter(documentsWriterPerThread.docState, termsHash, normsWriter); + return new DocFieldProcessor(documentsWriterPerThread, docInverter); + } + }; + + static class DocState { + final DocumentsWriterPerThread docWriter; + Analyzer analyzer; + PrintStream infoStream; + SimilarityProvider similarityProvider; + int docID; + Document doc; + String maxTermPrefix; + + DocState(DocumentsWriterPerThread docWriter) { + this.docWriter = docWriter; + } + + // Only called by asserts + public boolean testPoint(String name) { + return docWriter.writer.testPoint(name); + } + + public void clear() { + // don't hold onto doc nor analyzer, in case it is + // largish: + doc = null; + analyzer = null; + } + } + + static class FlushedSegment { + final SegmentInfo segmentInfo; + final BufferedDeletes segmentDeletes; + final BitVector deletedDocuments; + + private FlushedSegment(SegmentInfo segmentInfo, + BufferedDeletes segmentDeletes, BitVector deletedDocuments) { + this.segmentInfo = segmentInfo; + this.segmentDeletes = segmentDeletes; + this.deletedDocuments = deletedDocuments; + } + } + + /** Called if we hit an exception at a bad time (when + * updating the index files) and must discard all + * currently buffered docs. This resets our state, + * discarding any docs added since last flush. */ + void abort() throws IOException { + hasAborted = aborting = true; + try { + if (infoStream != null) { + message("docWriter: now abort"); + } + try { + consumer.abort(); + } catch (Throwable t) { + } + + pendingDeletes.clear(); + deleteSlice = deleteQueue.newSlice(); + // Reset all postings data + doAfterFlush(); + + } finally { + aborting = false; + if (infoStream != null) { + message("docWriter: done abort"); + } + } + } + + final DocumentsWriter parent; + final IndexWriter writer; + final Directory directory; + final DocState docState; + final DocConsumer consumer; + final AtomicLong bytesUsed; + + SegmentWriteState flushState; + //Deletes for our still-in-RAM (to be flushed next) segment + BufferedDeletes pendingDeletes; + String segment; // Current segment we are working on + boolean aborting = false; // True if an abort is pending + boolean hasAborted = false; // True if the last exception throws by #updateDocument was aborting + + private FieldInfos fieldInfos; + private final PrintStream infoStream; + private int numDocsInRAM; + private int flushedDocCount; + DocumentsWriterDeleteQueue deleteQueue; + DeleteSlice deleteSlice; + private final NumberFormat nf = NumberFormat.getInstance(); + + + public DocumentsWriterPerThread(Directory directory, DocumentsWriter parent, + FieldInfos fieldInfos, IndexingChain indexingChain) { + this.directory = directory; + this.parent = parent; + this.fieldInfos = fieldInfos; + this.writer = parent.indexWriter; + this.infoStream = parent.indexWriter.getInfoStream(); + this.docState = new DocState(this); + this.docState.similarityProvider = parent.indexWriter.getConfig() + .getSimilarityProvider(); + + consumer = indexingChain.getChain(this); + bytesUsed = new AtomicLong(0); + pendingDeletes = new BufferedDeletes(false); + initialize(); + } + + public DocumentsWriterPerThread(DocumentsWriterPerThread other, FieldInfos fieldInfos) { + this(other.directory, other.parent, fieldInfos, other.parent.chain); + } + + void initialize() { + deleteQueue = parent.deleteQueue; + assert numDocsInRAM == 0 : "num docs " + numDocsInRAM; + pendingDeletes.clear(); + deleteSlice = null; + } + + void setAborting() { + aborting = true; + } + + boolean checkAndResetHasAborted() { + final boolean retval = hasAborted; + hasAborted = false; + return retval; + } + + public void updateDocument(Document doc, Analyzer analyzer, Term delTerm) throws IOException { + assert writer.testPoint("DocumentsWriterPerThread addDocument start"); + assert deleteQueue != null; + docState.doc = doc; + docState.analyzer = analyzer; + docState.docID = numDocsInRAM; + if (segment == null) { + // this call is synchronized on IndexWriter.segmentInfos + segment = writer.newSegmentName(); + assert numDocsInRAM == 0; + } + + boolean success = false; + try { + try { + consumer.processDocument(fieldInfos); + } finally { + docState.clear(); + } + success = true; + } finally { + if (!success) { + if (!aborting) { + // mark document as deleted + deleteDocID(docState.docID); + numDocsInRAM++; + } else { + abort(); + } + } + } + success = false; + try { + consumer.finishDocument(); + success = true; + } finally { + if (!success) { + abort(); + } + } + finishDocument(delTerm); + } + + private void finishDocument(Term delTerm) throws IOException { + /* + * here we actually finish the document in two steps 1. push the delete into + * the queue and update our slice. 2. increment the DWPT private document + * id. + * + * the updated slice we get from 1. holds all the deletes that have occurred + * since we updated the slice the last time. + */ + if (deleteSlice == null) { + deleteSlice = deleteQueue.newSlice(); + if (delTerm != null) { + deleteQueue.add(delTerm, deleteSlice); + deleteSlice.reset(); + } + + } else { + if (delTerm != null) { + deleteQueue.add(delTerm, deleteSlice); + assert deleteSlice.isTailItem(delTerm) : "expected the delete term as the tail item"; + deleteSlice.apply(pendingDeletes, numDocsInRAM); + } else if (deleteQueue.updateSlice(deleteSlice)) { + deleteSlice.apply(pendingDeletes, numDocsInRAM); + } + } + ++numDocsInRAM; + } + + // Buffer a specific docID for deletion. Currently only + // used when we hit a exception when adding a document + void deleteDocID(int docIDUpto) { + pendingDeletes.addDocID(docIDUpto); + // NOTE: we do not trigger flush here. This is + // potentially a RAM leak, if you have an app that tries + // to add docs but every single doc always hits a + // non-aborting exception. Allowing a flush here gets + // very messy because we are only invoked when handling + // exceptions so to do this properly, while handling an + // exception we'd have to go off and flush new deletes + // which is risky (likely would hit some other + // confounding exception). + } + + /** + * Returns the number of delete terms in this {@link DocumentsWriterPerThread} + */ + public int numDeleteTerms() { + // public for FlushPolicy + return pendingDeletes.numTermDeletes.get(); + } + + /** + * Returns the number of RAM resident documents in this {@link DocumentsWriterPerThread} + */ + public int getNumDocsInRAM() { + // public for FlushPolicy + return numDocsInRAM; + } + + SegmentCodecs getCodec() { + return flushState.segmentCodecs; + } + + /** Reset after a flush */ + private void doAfterFlush() throws IOException { + segment = null; + consumer.doAfterFlush(); + fieldInfos = new FieldInfos(fieldInfos); + parent.subtractFlushedNumDocs(numDocsInRAM); + numDocsInRAM = 0; + } + + /** + * Prepares this DWPT for flushing. This method will freeze and return the + * {@link DocumentsWriterDeleteQueue}s global buffer and apply all pending + * deletes to this DWPT. + */ + FrozenBufferedDeletes prepareFlush() { + assert numDocsInRAM > 0; + final FrozenBufferedDeletes globalDeletes = deleteQueue.freezeGlobalBuffer(deleteSlice); + /* deleteSlice can possibly be null if we have hit non-aborting exceptions during indexing and never succeeded + adding a document. */ + if (deleteSlice != null) { + // apply all deletes before we flush and release the delete slice + deleteSlice.apply(pendingDeletes, numDocsInRAM); + assert deleteSlice.isEmpty(); + deleteSlice = null; + } + return globalDeletes; + } + + /** Flush all pending docs to a new segment */ + FlushedSegment flush() throws IOException { + assert numDocsInRAM > 0; + assert deleteSlice == null : "all deletes must be applied in prepareFlush"; + flushState = new SegmentWriteState(infoStream, directory, segment, fieldInfos, + numDocsInRAM, writer.getConfig().getTermIndexInterval(), + fieldInfos.buildSegmentCodecs(true), pendingDeletes); + final double startMBUsed = parent.flushControl.netBytes() / 1024. / 1024.; + // Apply delete-by-docID now (delete-byDocID only + // happens when an exception is hit processing that + // doc, eg if analyzer has some problem w/ the text): + if (pendingDeletes.docIDs.size() > 0) { + flushState.deletedDocs = new BitVector(numDocsInRAM); + for(int delDocID : pendingDeletes.docIDs) { + flushState.deletedDocs.set(delDocID); + } + pendingDeletes.bytesUsed.addAndGet(-pendingDeletes.docIDs.size() * BufferedDeletes.BYTES_PER_DEL_DOCID); + pendingDeletes.docIDs.clear(); + } + + if (infoStream != null) { + message("flush postings as segment " + flushState.segmentName + " numDocs=" + numDocsInRAM); + } + + if (aborting) { + if (infoStream != null) { + message("flush: skip because aborting is set"); + } + return null; + } + + boolean success = false; + + try { + + SegmentInfo newSegment = new SegmentInfo(segment, flushState.numDocs, directory, false, fieldInfos.hasProx(), flushState.segmentCodecs, false, fieldInfos); + consumer.flush(flushState); + pendingDeletes.terms.clear(); + newSegment.setHasVectors(flushState.hasVectors); + + if (infoStream != null) { + message("new segment has " + (flushState.deletedDocs == null ? 0 : flushState.deletedDocs.count()) + " deleted docs"); + message("new segment has " + (flushState.hasVectors ? "vectors" : "no vectors")); + message("flushedFiles=" + newSegment.files()); + message("flushed codecs=" + newSegment.getSegmentCodecs()); + } + flushedDocCount += flushState.numDocs; + + final BufferedDeletes segmentDeletes; + if (pendingDeletes.queries.isEmpty()) { + pendingDeletes.clear(); + segmentDeletes = null; + } else { + segmentDeletes = pendingDeletes; + pendingDeletes = new BufferedDeletes(false); + } + + if (infoStream != null) { + final double newSegmentSizeNoStore = newSegment.sizeInBytes(false)/1024./1024.; + final double newSegmentSize = newSegment.sizeInBytes(true)/1024./1024.; + message("flushed: segment=" + newSegment + + " ramUsed=" + nf.format(startMBUsed) + " MB" + + " newFlushedSize=" + nf.format(newSegmentSize) + " MB" + + " (" + nf.format(newSegmentSizeNoStore) + " MB w/o doc stores)" + + " docs/MB=" + nf.format(flushedDocCount / newSegmentSize) + + " new/old=" + nf.format(100.0 * newSegmentSizeNoStore / startMBUsed) + "%"); + } + doAfterFlush(); + success = true; + + return new FlushedSegment(newSegment, segmentDeletes, flushState.deletedDocs); + } finally { + if (!success) { + if (segment != null) { + synchronized(parent.indexWriter) { + parent.indexWriter.deleter.refresh(segment); + } + } + abort(); + } + } + } + + /** Get current segment name we are writing. */ + String getSegment() { + return segment; + } + + long bytesUsed() { + return bytesUsed.get() + pendingDeletes.bytesUsed.get(); + } + + FieldInfos getFieldInfos() { + return fieldInfos; + } + + void message(String message) { + writer.message("DWPT: " + message); + } + + /* Initial chunks size of the shared byte[] blocks used to + store postings data */ + final static int BYTE_BLOCK_NOT_MASK = ~BYTE_BLOCK_MASK; + + /* if you increase this, you must fix field cache impl for + * getTerms/getTermsIndex requires <= 32768 */ + final static int MAX_TERM_LENGTH_UTF8 = BYTE_BLOCK_SIZE-2; + + /* Initial chunks size of the shared int[] blocks used to + store postings data */ + final static int INT_BLOCK_SHIFT = 13; + final static int INT_BLOCK_SIZE = 1 << INT_BLOCK_SHIFT; + final static int INT_BLOCK_MASK = INT_BLOCK_SIZE - 1; + + /* Allocate another int[] from the shared pool */ + int[] getIntBlock() { + int[] b = new int[INT_BLOCK_SIZE]; + bytesUsed.addAndGet(INT_BLOCK_SIZE*RamUsageEstimator.NUM_BYTES_INT); + return b; + } + + void recycleIntBlocks(int[][] blocks, int offset, int length) { + bytesUsed.addAndGet(-(length *(INT_BLOCK_SIZE*RamUsageEstimator.NUM_BYTES_INT))); + } + + final Allocator byteBlockAllocator = new DirectTrackingAllocator(); + + + private class DirectTrackingAllocator extends Allocator { + public DirectTrackingAllocator() { + this(BYTE_BLOCK_SIZE); + } + + public DirectTrackingAllocator(int blockSize) { + super(blockSize); + } + + public byte[] getByteBlock() { + bytesUsed.addAndGet(blockSize); + return new byte[blockSize]; + } + @Override + public void recycleByteBlocks(byte[][] blocks, int start, int end) { + bytesUsed.addAndGet(-((end-start)* blockSize)); + for (int i = start; i < end; i++) { + blocks[i] = null; + } + } + + }; + + PerDocWriteState newPerDocWriteState(int codecId) { + assert segment != null; + return new PerDocWriteState(infoStream, directory, segment, fieldInfos, bytesUsed, codecId); + } +} diff --git a/lucene/src/java/org/apache/lucene/index/DocumentsWriterPerThreadPool.java b/lucene/src/java/org/apache/lucene/index/DocumentsWriterPerThreadPool.java new file mode 100644 index 00000000000..0a03ea39248 --- /dev/null +++ b/lucene/src/java/org/apache/lucene/index/DocumentsWriterPerThreadPool.java @@ -0,0 +1,268 @@ +package org.apache.lucene.index; +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.util.Iterator; +import java.util.concurrent.locks.ReentrantLock; + +import org.apache.lucene.document.Document; +import org.apache.lucene.index.FieldInfos.FieldNumberBiMap; +import org.apache.lucene.index.SegmentCodecs.SegmentCodecsBuilder; +import org.apache.lucene.index.codecs.CodecProvider; +import org.apache.lucene.util.SetOnce; + +/** + * {@link DocumentsWriterPerThreadPool} controls {@link ThreadState} instances + * and their thread assignments during indexing. Each {@link ThreadState} holds + * a reference to a {@link DocumentsWriterPerThread} that is once a + * {@link ThreadState} is obtained from the pool exclusively used for indexing a + * single document by the obtaining thread. Each indexing thread must obtain + * such a {@link ThreadState} to make progress. Depending on the + * {@link DocumentsWriterPerThreadPool} implementation {@link ThreadState} + * assignments might differ from document to document. + *

+ * Once a {@link DocumentsWriterPerThread} is selected for flush the thread pool + * is reusing the flushing {@link DocumentsWriterPerThread}s ThreadState with a + * new {@link DocumentsWriterPerThread} instance. + *

+ */ +public abstract class DocumentsWriterPerThreadPool { + /** The maximum number of simultaneous threads that may be + * indexing documents at once in IndexWriter; if more + * than this many threads arrive they will wait for + * others to finish. */ + public final static int DEFAULT_MAX_THREAD_STATES = 8; + + /** + * {@link ThreadState} references and guards a + * {@link DocumentsWriterPerThread} instance that is used during indexing to + * build a in-memory index segment. {@link ThreadState} also holds all flush + * related per-thread data controlled by {@link DocumentsWriterFlushControl}. + *

+ * A {@link ThreadState}, its methods and members should only accessed by one + * thread a time. Users must acquire the lock via {@link ThreadState#lock()} + * and release the lock in a finally block via {@link ThreadState#unlock()} + * before accessing the state. + */ + @SuppressWarnings("serial") + public final static class ThreadState extends ReentrantLock { + // package private for FlushPolicy + DocumentsWriterPerThread perThread; + // write access guarded by DocumentsWriterFlushControl + volatile boolean flushPending = false; + // write access guarded by DocumentsWriterFlushControl + long bytesUsed = 0; + // guarded by Reentrant lock + private boolean isActive = true; + + ThreadState(DocumentsWriterPerThread perThread) { + this.perThread = perThread; + } + + /** + * Resets the internal {@link DocumentsWriterPerThread} with the given one. + * if the given DWPT is null this ThreadState is marked as inactive and should not be used + * for indexing anymore. + * @see #isActive() + */ + void resetWriter(DocumentsWriterPerThread perThread) { + assert this.isHeldByCurrentThread(); + if (perThread == null) { + isActive = false; + } + this.perThread = perThread; + this.bytesUsed = 0; + this.flushPending = false; + } + + /** + * Returns true if this ThreadState is still open. This will + * only return false iff the DW has been closed and this + * ThreadState is already checked out for flush. + */ + boolean isActive() { + assert this.isHeldByCurrentThread(); + return isActive; + } + + /** + * Returns the number of currently active bytes in this ThreadState's + * {@link DocumentsWriterPerThread} + */ + public long getBytesUsedPerThread() { + assert this.isHeldByCurrentThread(); + // public for FlushPolicy + return bytesUsed; + } + + /** + * Returns this {@link ThreadState}s {@link DocumentsWriterPerThread} + */ + public DocumentsWriterPerThread getDocumentsWriterPerThread() { + assert this.isHeldByCurrentThread(); + // public for FlushPolicy + return perThread; + } + + /** + * Returns true iff this {@link ThreadState} is marked as flush + * pending otherwise false + */ + public boolean isFlushPending() { + return flushPending; + } + } + + private final ThreadState[] perThreads; + private volatile int numThreadStatesActive; + private CodecProvider codecProvider; + private FieldNumberBiMap globalFieldMap; + private final SetOnce documentsWriter = new SetOnce(); + + /** + * Creates a new {@link DocumentsWriterPerThreadPool} with max. + * {@link #DEFAULT_MAX_THREAD_STATES} thread states. + */ + public DocumentsWriterPerThreadPool() { + this(DEFAULT_MAX_THREAD_STATES); + } + + public DocumentsWriterPerThreadPool(int maxNumPerThreads) { + maxNumPerThreads = (maxNumPerThreads < 1) ? DEFAULT_MAX_THREAD_STATES : maxNumPerThreads; + perThreads = new ThreadState[maxNumPerThreads]; + numThreadStatesActive = 0; + } + + public void initialize(DocumentsWriter documentsWriter, FieldNumberBiMap globalFieldMap, IndexWriterConfig config) { + this.documentsWriter.set(documentsWriter); // thread pool is bound to DW + final CodecProvider codecs = config.getCodecProvider(); + this.codecProvider = codecs; + this.globalFieldMap = globalFieldMap; + for (int i = 0; i < perThreads.length; i++) { + final FieldInfos infos = globalFieldMap.newFieldInfos(SegmentCodecsBuilder.create(codecs)); + perThreads[i] = new ThreadState(new DocumentsWriterPerThread(documentsWriter.directory, documentsWriter, infos, documentsWriter.chain)); + } + } + + /** + * Returns the max number of {@link ThreadState} instances available in this + * {@link DocumentsWriterPerThreadPool} + */ + public int getMaxThreadStates() { + return perThreads.length; + } + + /** + * Returns a new {@link ThreadState} iff any new state is available otherwise + * null. + *

+ * NOTE: the returned {@link ThreadState} is already locked iff non- + * null. + * + * @return a new {@link ThreadState} iff any new state is available otherwise + * null + */ + public synchronized ThreadState newThreadState() { + if (numThreadStatesActive < perThreads.length) { + final ThreadState threadState = perThreads[numThreadStatesActive]; + threadState.lock(); // lock so nobody else will get this ThreadState + numThreadStatesActive++; // increment will publish the ThreadState + threadState.perThread.initialize(); + return threadState; + } + return null; + } + + protected DocumentsWriterPerThread replaceForFlush(ThreadState threadState, boolean closed) { + assert threadState.isHeldByCurrentThread(); + final DocumentsWriterPerThread dwpt = threadState.perThread; + if (!closed) { + final FieldInfos infos = globalFieldMap.newFieldInfos(SegmentCodecsBuilder.create(codecProvider)); + final DocumentsWriterPerThread newDwpt = new DocumentsWriterPerThread(dwpt, infos); + newDwpt.initialize(); + threadState.resetWriter(newDwpt); + } else { + threadState.resetWriter(null); + } + return dwpt; + } + + public void recycle(DocumentsWriterPerThread dwpt) { + // don't recycle DWPT by default + } + + public abstract ThreadState getAndLock(Thread requestingThread, DocumentsWriter documentsWriter, Document doc); + + /** + * Returns an iterator providing access to all {@link ThreadState} + * instances. + */ + // TODO: new Iterator per indexed doc is overkill...? + public Iterator getAllPerThreadsIterator() { + return getPerThreadsIterator(this.perThreads.length); + } + + /** + * Returns an iterator providing access to all active {@link ThreadState} + * instances. + *

+ * Note: The returned iterator will only iterator + * {@link ThreadState}s that are active at the point in time when this method + * has been called. + * + */ + // TODO: new Iterator per indexed doc is overkill...? + public Iterator getActivePerThreadsIterator() { + return getPerThreadsIterator(numThreadStatesActive); + } + + private Iterator getPerThreadsIterator(final int upto) { + return new Iterator() { + int i = 0; + + public boolean hasNext() { + return i < upto; + } + + public ThreadState next() { + return perThreads[i++]; + } + + public void remove() { + throw new UnsupportedOperationException("remove() not supported."); + } + }; + } + + /** + * Returns the ThreadState with the minimum estimated number of threads + * waiting to acquire its lock or null if no {@link ThreadState} + * is yet visible to the calling thread. + */ + protected ThreadState minContendedThreadState() { + ThreadState minThreadState = null; + // TODO: new Iterator per indexed doc is overkill...? + final Iterator it = getActivePerThreadsIterator(); + while (it.hasNext()) { + final ThreadState state = it.next(); + if (minThreadState == null || state.getQueueLength() < minThreadState.getQueueLength()) { + minThreadState = state; + } + } + return minThreadState; + } +} diff --git a/lucene/src/java/org/apache/lucene/index/DocumentsWriterThreadState.java b/lucene/src/java/org/apache/lucene/index/DocumentsWriterThreadState.java deleted file mode 100644 index 611098a64bc..00000000000 --- a/lucene/src/java/org/apache/lucene/index/DocumentsWriterThreadState.java +++ /dev/null @@ -1,47 +0,0 @@ -package org.apache.lucene.index; - -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import java.io.IOException; - -/** Used by DocumentsWriter to maintain per-thread state. - * We keep a separate Posting hash and other state for each - * thread and then merge postings hashes from all threads - * when writing the segment. */ -final class DocumentsWriterThreadState { - - boolean isIdle = true; // false if this is currently in use by a thread - int numThreads = 1; // Number of threads that share this instance - final DocConsumerPerThread consumer; - final DocumentsWriter.DocState docState; - - final DocumentsWriter docWriter; - - public DocumentsWriterThreadState(DocumentsWriter docWriter) throws IOException { - this.docWriter = docWriter; - docState = new DocumentsWriter.DocState(); - docState.infoStream = docWriter.infoStream; - docState.similarityProvider = docWriter.similarityProvider; - docState.docWriter = docWriter; - consumer = docWriter.consumer.addThread(this); - } - - void doAfterFlush() { - numThreads = 0; - } -} diff --git a/lucene/src/java/org/apache/lucene/index/FieldInfo.java b/lucene/src/java/org/apache/lucene/index/FieldInfo.java index 144e0e1e3cb..3aba2850b42 100644 --- a/lucene/src/java/org/apache/lucene/index/FieldInfo.java +++ b/lucene/src/java/org/apache/lucene/index/FieldInfo.java @@ -22,9 +22,11 @@ import org.apache.lucene.index.values.Type; /** @lucene.experimental */ public final class FieldInfo { public static final int UNASSIGNED_CODEC_ID = -1; - public String name; + + public final String name; + public final int number; + public boolean isIndexed; - public int number; Type docValues; @@ -61,6 +63,7 @@ public final class FieldInfo { this.omitNorms = false; this.omitTermFreqAndPositions = false; } + assert !omitTermFreqAndPositions || !storePayloads; } void setCodecId(int codecId) { @@ -83,6 +86,7 @@ public final class FieldInfo { // should only be called by FieldInfos#addOrUpdate void update(boolean isIndexed, boolean storeTermVector, boolean storePositionWithTermVector, boolean storeOffsetWithTermVector, boolean omitNorms, boolean storePayloads, boolean omitTermFreqAndPositions) { + if (this.isIndexed != isIndexed) { this.isIndexed = true; // once indexed, always index } @@ -104,8 +108,10 @@ public final class FieldInfo { } if (this.omitTermFreqAndPositions != omitTermFreqAndPositions) { this.omitTermFreqAndPositions = true; // if one require omitTermFreqAndPositions at least once, it remains off for life + this.storePayloads = false; } } + assert !this.omitTermFreqAndPositions || !this.storePayloads; } void setDocValues(Type v) { diff --git a/lucene/src/java/org/apache/lucene/index/FieldInfos.java b/lucene/src/java/org/apache/lucene/index/FieldInfos.java index 33124c772b5..c62649a6bf1 100644 --- a/lucene/src/java/org/apache/lucene/index/FieldInfos.java +++ b/lucene/src/java/org/apache/lucene/index/FieldInfos.java @@ -28,6 +28,7 @@ import java.util.SortedMap; import java.util.TreeMap; import java.util.Map.Entry; +import org.apache.lucene.index.SegmentCodecs; // Required for Java 1.5 javadocs import org.apache.lucene.index.SegmentCodecs.SegmentCodecsBuilder; import org.apache.lucene.index.codecs.CodecProvider; import org.apache.lucene.index.values.Type; @@ -187,7 +188,7 @@ public final class FieldInfos implements Iterable { } // used by assert - boolean containsConsistent(Integer number, String name) { + synchronized boolean containsConsistent(Integer number, String name) { return name.equals(numberToName.get(number)) && number.equals(nameToNumber.get(name)); } @@ -222,12 +223,13 @@ public final class FieldInfos implements Iterable { /** * Creates a new {@link FieldInfos} instance with a private - * {@link FieldNumberBiMap} and a default {@link SegmentCodecsBuilder} + * {@link org.apache.lucene.index.FieldInfos.FieldNumberBiMap} and a default {@link SegmentCodecsBuilder} * initialized with {@link CodecProvider#getDefault()}. *

* Note: this ctor should not be used during indexing use * {@link FieldInfos#FieldInfos(FieldInfos)} or - * {@link FieldInfos#FieldInfos(FieldNumberBiMap)} instead. + * {@link FieldInfos#FieldInfos(FieldNumberBiMap,org.apache.lucene.index.SegmentCodecs.SegmentCodecsBuilder)} + * instead. */ public FieldInfos() { this(new FieldNumberBiMap(), SegmentCodecsBuilder.create(CodecProvider.getDefault())); @@ -556,9 +558,10 @@ public final class FieldInfos implements Iterable { /** * Returns true iff this instance is not backed by a - * {@link FieldNumberBiMap}. Instances read from a directory via + * {@link org.apache.lucene.index.FieldInfos.FieldNumberBiMap}. Instances read from a directory via * {@link FieldInfos#FieldInfos(Directory, String)} will always be read-only - * since no {@link FieldNumberBiMap} is supplied, otherwise false. + * since no {@link org.apache.lucene.index.FieldInfos.FieldNumberBiMap} is supplied, otherwise + * false. */ public final boolean isReadOnly() { return globalFieldNumbers == null; @@ -568,6 +571,7 @@ public final class FieldInfos implements Iterable { output.writeVInt(FORMAT_CURRENT); output.writeVInt(size()); for (FieldInfo fi : this) { + assert !fi.omitTermFreqAndPositions || !fi.storePayloads; byte bits = 0x0; if (fi.isIndexed) bits |= IS_INDEXED; if (fi.storeTermVector) bits |= STORE_TERMVECTOR; @@ -647,6 +651,14 @@ public final class FieldInfos implements Iterable { boolean omitNorms = (bits & OMIT_NORMS) != 0; boolean storePayloads = (bits & STORE_PAYLOADS) != 0; boolean omitTermFreqAndPositions = (bits & OMIT_TERM_FREQ_AND_POSITIONS) != 0; + + // LUCENE-3027: past indices were able to write + // storePayloads=true when omitTFAP is also true, + // which is invalid. We correct that, here: + if (omitTermFreqAndPositions) { + storePayloads = false; + } + Type docValuesType = null; if (format <= FORMAT_INDEX_VALUES) { final byte b = input.readByte(); diff --git a/lucene/src/java/org/apache/lucene/index/Fields.java b/lucene/src/java/org/apache/lucene/index/Fields.java index 20e7176f4ec..01b7f0d50ca 100644 --- a/lucene/src/java/org/apache/lucene/index/Fields.java +++ b/lucene/src/java/org/apache/lucene/index/Fields.java @@ -19,8 +19,6 @@ package org.apache.lucene.index; import java.io.IOException; -import org.apache.lucene.index.values.DocValues; - /** Flex API for access to fields and terms * @lucene.experimental */ @@ -34,15 +32,5 @@ public abstract class Fields { * null if the field does not exist. */ public abstract Terms terms(String field) throws IOException; - /** - * Returns {@link DocValues} for the current field. - * - * @param field the field name - * @return the {@link DocValues} for this field or null if not - * applicable. - * @throws IOException - */ - public abstract DocValues docValues(String field) throws IOException; - public final static Fields[] EMPTY_ARRAY = new Fields[0]; } diff --git a/lucene/src/java/org/apache/lucene/index/FieldsEnum.java b/lucene/src/java/org/apache/lucene/index/FieldsEnum.java index 290cd107cfb..51ffa5f04b9 100644 --- a/lucene/src/java/org/apache/lucene/index/FieldsEnum.java +++ b/lucene/src/java/org/apache/lucene/index/FieldsEnum.java @@ -58,16 +58,6 @@ public abstract class FieldsEnum { * will not return null. */ public abstract TermsEnum terms() throws IOException; - /** - * Returns {@link DocValues} for the current field. - * - * @return the {@link DocValues} for this field or null if not - * applicable. - * @throws IOException - */ - public abstract DocValues docValues() throws IOException; - - public final static FieldsEnum[] EMPTY_ARRAY = new FieldsEnum[0]; /** Provides zero fields */ @@ -82,10 +72,5 @@ public abstract class FieldsEnum { public TermsEnum terms() { throw new IllegalStateException("this method should never be called"); } - - @Override - public DocValues docValues() throws IOException { - throw new IllegalStateException("this method should never be called"); - } }; } diff --git a/lucene/src/java/org/apache/lucene/index/FieldsWriter.java b/lucene/src/java/org/apache/lucene/index/FieldsWriter.java index f694bb4342c..303aa912bc3 100644 --- a/lucene/src/java/org/apache/lucene/index/FieldsWriter.java +++ b/lucene/src/java/org/apache/lucene/index/FieldsWriter.java @@ -2,13 +2,13 @@ package org.apache.lucene.index; /** * Copyright 2004 The Apache Software Foundation - * + * * Licensed under the Apache License, Version 2.0 (the "License"); you may not * use this file except in compliance with the License. You may obtain a copy of * the License at - * + * * http://www.apache.org/licenses/LICENSE-2.0 - * + * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the @@ -22,15 +22,14 @@ import java.util.List; import org.apache.lucene.document.Document; import org.apache.lucene.document.Fieldable; import org.apache.lucene.store.Directory; -import org.apache.lucene.store.RAMOutputStream; -import org.apache.lucene.store.IndexOutput; import org.apache.lucene.store.IndexInput; +import org.apache.lucene.store.IndexOutput; import org.apache.lucene.util.IOUtils; final class FieldsWriter { static final byte FIELD_IS_TOKENIZED = 0x1; static final byte FIELD_IS_BINARY = 0x2; - + // Lucene 3.0: Removal of compressed fields static final int FORMAT_LUCENE_3_0_NO_COMPRESSED_FIELDS = 2; @@ -38,7 +37,7 @@ final class FieldsWriter { // than the current one, and always change this if you // switch to a new format! static final int FORMAT_CURRENT = FORMAT_LUCENE_3_0_NO_COMPRESSED_FIELDS; - + // when removing support for old versions, leave the last supported version here static final int FORMAT_MINIMUM = FORMAT_LUCENE_3_0_NO_COMPRESSED_FIELDS; @@ -83,10 +82,9 @@ final class FieldsWriter { // and adds a new entry for this document into the index // stream. This assumes the buffer was already written // in the correct fields format. - void flushDocument(int numStoredFields, RAMOutputStream buffer) throws IOException { + void startDocument(int numStoredFields) throws IOException { indexStream.writeLong(fieldsStream.getFilePointer()); fieldsStream.writeVInt(numStoredFields); - buffer.writeTo(fieldsStream); } void skipDocument() throws IOException { @@ -121,8 +119,8 @@ final class FieldsWriter { } } - final void writeField(FieldInfo fi, Fieldable field) throws IOException { - fieldsStream.writeVInt(fi.number); + final void writeField(int fieldNumber, Fieldable field) throws IOException { + fieldsStream.writeVInt(fieldNumber); byte bits = 0; if (field.isTokenized()) bits |= FieldsWriter.FIELD_IS_TOKENIZED; @@ -175,10 +173,9 @@ final class FieldsWriter { fieldsStream.writeVInt(storedCount); - for (Fieldable field : fields) { if (field.isStored()) - writeField(fieldInfos.fieldInfo(field.name()), field); + writeField(fieldInfos.fieldNumber(field.name()), field); } } } diff --git a/lucene/src/java/org/apache/lucene/index/FilterIndexReader.java b/lucene/src/java/org/apache/lucene/index/FilterIndexReader.java index 4dc7cfee89e..8d17f534db4 100644 --- a/lucene/src/java/org/apache/lucene/index/FilterIndexReader.java +++ b/lucene/src/java/org/apache/lucene/index/FilterIndexReader.java @@ -19,9 +19,7 @@ package org.apache.lucene.index; import org.apache.lucene.document.Document; import org.apache.lucene.document.FieldSelector; -import org.apache.lucene.index.IndexReader.ReaderContext; -import org.apache.lucene.index.values.DocValues; -import org.apache.lucene.index.values.DocValuesEnum; +import org.apache.lucene.index.codecs.PerDocValues; import org.apache.lucene.store.Directory; import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; @@ -62,11 +60,6 @@ public class FilterIndexReader extends IndexReader { public Terms terms(String field) throws IOException { return in.terms(field); } - - @Override - public DocValues docValues(String field) throws IOException { - return in.docValues(field); - } } /** Base class for filtering {@link Terms} @@ -130,11 +123,6 @@ public class FilterIndexReader extends IndexReader { public TermsEnum terms() throws IOException { return in.terms(); } - - @Override - public DocValues docValues() throws IOException { - return in.docValues(); - } } /** Base class for filtering {@link TermsEnum} implementations. */ @@ -475,4 +463,9 @@ public class FilterIndexReader extends IndexReader { super.removeReaderFinishedListener(listener); in.removeReaderFinishedListener(listener); } + + @Override + public PerDocValues perDocValues() throws IOException { + return in.perDocValues(); + } } diff --git a/lucene/src/java/org/apache/lucene/index/FlushByRamOrCountsPolicy.java b/lucene/src/java/org/apache/lucene/index/FlushByRamOrCountsPolicy.java new file mode 100644 index 00000000000..81e3676246b --- /dev/null +++ b/lucene/src/java/org/apache/lucene/index/FlushByRamOrCountsPolicy.java @@ -0,0 +1,128 @@ +package org.apache.lucene.index; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.index.DocumentsWriterPerThreadPool.ThreadState; + +/** + * Default {@link FlushPolicy} implementation that flushes based on RAM used, + * document count and number of buffered deletes depending on the IndexWriter's + * {@link IndexWriterConfig}. + * + *

    + *
  • {@link #onDelete(DocumentsWriterFlushControl, DocumentsWriterPerThreadPool.ThreadState)} - flushes + * based on the global number of buffered delete terms iff + * {@link IndexWriterConfig#getMaxBufferedDeleteTerms()} is enabled
  • + *
  • {@link #onInsert(DocumentsWriterFlushControl, DocumentsWriterPerThreadPool.ThreadState)} - flushes + * either on the number of documents per {@link DocumentsWriterPerThread} ( + * {@link DocumentsWriterPerThread#getNumDocsInRAM()}) or on the global active + * memory consumption in the current indexing session iff + * {@link IndexWriterConfig#getMaxBufferedDocs()} or + * {@link IndexWriterConfig#getRAMBufferSizeMB()} is enabled respectively
  • + *
  • {@link #onUpdate(DocumentsWriterFlushControl, DocumentsWriterPerThreadPool.ThreadState)} - calls + * {@link #onInsert(DocumentsWriterFlushControl, DocumentsWriterPerThreadPool.ThreadState)} and + * {@link #onDelete(DocumentsWriterFlushControl, DocumentsWriterPerThreadPool.ThreadState)} in order
  • + *
+ * All {@link IndexWriterConfig} settings are used to mark + * {@link DocumentsWriterPerThread} as flush pending during indexing with + * respect to their live updates. + *

+ * If {@link IndexWriterConfig#setRAMBufferSizeMB(double)} is enabled, the + * largest ram consuming {@link DocumentsWriterPerThread} will be marked as + * pending iff the global active RAM consumption is >= the configured max RAM + * buffer. + */ +public class FlushByRamOrCountsPolicy extends FlushPolicy { + + @Override + public void onDelete(DocumentsWriterFlushControl control, ThreadState state) { + if (flushOnDeleteTerms()) { + // Flush this state by num del terms + final int maxBufferedDeleteTerms = indexWriterConfig + .getMaxBufferedDeleteTerms(); + if (control.getNumGlobalTermDeletes() >= maxBufferedDeleteTerms) { + control.setApplyAllDeletes(); + } + } + final DocumentsWriter writer = this.writer.get(); + // If deletes alone are consuming > 1/2 our RAM + // buffer, force them all to apply now. This is to + // prevent too-frequent flushing of a long tail of + // tiny segments: + if ((flushOnRAM() && + writer.deleteQueue.bytesUsed() > (1024*1024*indexWriterConfig.getRAMBufferSizeMB()/2))) { + control.setApplyAllDeletes(); + if (writer.infoStream != null) { + writer.message("force apply deletes bytesUsed=" + writer.deleteQueue.bytesUsed() + " vs ramBuffer=" + (1024*1024*indexWriterConfig.getRAMBufferSizeMB())); + } + } + } + + @Override + public void onInsert(DocumentsWriterFlushControl control, ThreadState state) { + if (flushOnDocCount() + && state.perThread.getNumDocsInRAM() >= indexWriterConfig + .getMaxBufferedDocs()) { + // Flush this state by num docs + control.setFlushPending(state); + } else if (flushOnRAM()) {// flush by RAM + final long limit = (long) (indexWriterConfig.getRAMBufferSizeMB() * 1024.d * 1024.d); + final long totalRam = control.activeBytes(); + if (totalRam >= limit) { + markLargestWriterPending(control, state, totalRam); + } + } + } + + /** + * Marks the most ram consuming active {@link DocumentsWriterPerThread} flush + * pending + */ + protected void markLargestWriterPending(DocumentsWriterFlushControl control, + ThreadState perThreadState, final long currentBytesPerThread) { + control + .setFlushPending(findLargestNonPendingWriter(control, perThreadState)); + } + + /** + * Returns true if this {@link FlushPolicy} flushes on + * {@link IndexWriterConfig#getMaxBufferedDocs()}, otherwise + * false. + */ + protected boolean flushOnDocCount() { + return indexWriterConfig.getMaxBufferedDocs() != IndexWriterConfig.DISABLE_AUTO_FLUSH; + } + + /** + * Returns true if this {@link FlushPolicy} flushes on + * {@link IndexWriterConfig#getMaxBufferedDeleteTerms()}, otherwise + * false. + */ + protected boolean flushOnDeleteTerms() { + return indexWriterConfig.getMaxBufferedDeleteTerms() != IndexWriterConfig.DISABLE_AUTO_FLUSH; + } + + /** + * Returns true if this {@link FlushPolicy} flushes on + * {@link IndexWriterConfig#getRAMBufferSizeMB()}, otherwise + * false. + */ + protected boolean flushOnRAM() { + return indexWriterConfig.getRAMBufferSizeMB() != IndexWriterConfig.DISABLE_AUTO_FLUSH; + } +} diff --git a/lucene/src/java/org/apache/lucene/index/FlushPolicy.java b/lucene/src/java/org/apache/lucene/index/FlushPolicy.java new file mode 100644 index 00000000000..13f8a45e847 --- /dev/null +++ b/lucene/src/java/org/apache/lucene/index/FlushPolicy.java @@ -0,0 +1,131 @@ +package org.apache.lucene.index; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +import java.util.Iterator; + +import org.apache.lucene.index.DocumentsWriterPerThreadPool.ThreadState; +import org.apache.lucene.store.Directory; +import org.apache.lucene.util.SetOnce; + +/** + * {@link FlushPolicy} controls when segments are flushed from a RAM resident + * internal data-structure to the {@link IndexWriter}s {@link Directory}. + *

+ * Segments are traditionally flushed by: + *

    + *
  • RAM consumption - configured via + * {@link IndexWriterConfig#setRAMBufferSizeMB(double)}
  • + *
  • Number of RAM resident documents - configured via + * {@link IndexWriterConfig#setMaxBufferedDocs(int)}
  • + *
  • Number of buffered delete terms/queries - configured via + * {@link IndexWriterConfig#setMaxBufferedDeleteTerms(int)}
  • + *
+ * + * The {@link IndexWriter} consults a provided {@link FlushPolicy} to control the + * flushing process. The policy is informed for each added or + * updated document as well as for each delete term. Based on the + * {@link FlushPolicy}, the information provided via {@link ThreadState} and + * {@link DocumentsWriterFlushControl}, the {@link FlushPolicy} decides if a + * {@link DocumentsWriterPerThread} needs flushing and mark it as + * flush-pending via + * {@link DocumentsWriterFlushControl#setFlushPending(DocumentsWriterPerThreadPool.ThreadState)}. + * + * @see ThreadState + * @see DocumentsWriterFlushControl + * @see DocumentsWriterPerThread + * @see IndexWriterConfig#setFlushPolicy(FlushPolicy) + */ +public abstract class FlushPolicy { + protected final SetOnce writer = new SetOnce(); + protected IndexWriterConfig indexWriterConfig; + + /** + * Called for each delete term. If this is a delete triggered due to an update + * the given {@link ThreadState} is non-null. + *

+ * Note: This method is called synchronized on the given + * {@link DocumentsWriterFlushControl} and it is guaranteed that the calling + * thread holds the lock on the given {@link ThreadState} + */ + public abstract void onDelete(DocumentsWriterFlushControl control, + ThreadState state); + + /** + * Called for each document update on the given {@link ThreadState}'s + * {@link DocumentsWriterPerThread}. + *

+ * Note: This method is called synchronized on the given + * {@link DocumentsWriterFlushControl} and it is guaranteed that the calling + * thread holds the lock on the given {@link ThreadState} + */ + public void onUpdate(DocumentsWriterFlushControl control, ThreadState state) { + onInsert(control, state); + if (!state.flushPending) { + onDelete(control, state); + } + } + + /** + * Called for each document addition on the given {@link ThreadState}s + * {@link DocumentsWriterPerThread}. + *

+ * Note: This method is synchronized by the given + * {@link DocumentsWriterFlushControl} and it is guaranteed that the calling + * thread holds the lock on the given {@link ThreadState} + */ + public abstract void onInsert(DocumentsWriterFlushControl control, + ThreadState state); + + /** + * Called by DocumentsWriter to initialize the FlushPolicy + */ + protected synchronized void init(DocumentsWriter docsWriter) { + writer.set(docsWriter); + indexWriterConfig = docsWriter.indexWriter.getConfig(); + } + + /** + * Returns the current most RAM consuming non-pending {@link ThreadState} with + * at least one indexed document. + *

+ * This method will never return null + */ + protected ThreadState findLargestNonPendingWriter( + DocumentsWriterFlushControl control, ThreadState perThreadState) { + assert perThreadState.perThread.getNumDocsInRAM() > 0; + long maxRamSoFar = perThreadState.bytesUsed; + // the dwpt which needs to be flushed eventually + ThreadState maxRamUsingThreadState = perThreadState; + assert !perThreadState.flushPending : "DWPT should have flushed"; + Iterator activePerThreadsIterator = control.allActiveThreads(); + while (activePerThreadsIterator.hasNext()) { + ThreadState next = activePerThreadsIterator.next(); + if (!next.flushPending) { + final long nextRam = next.bytesUsed; + if (nextRam > maxRamSoFar && next.perThread.getNumDocsInRAM() > 0) { + maxRamSoFar = nextRam; + maxRamUsingThreadState = next; + } + } + } + assert writer.get().message( + "set largest ram consuming thread pending on lower watermark"); + return maxRamUsingThreadState; + } + +} diff --git a/lucene/src/java/org/apache/lucene/index/FreqProxFieldMergeState.java b/lucene/src/java/org/apache/lucene/index/FreqProxFieldMergeState.java deleted file mode 100644 index de2a8cce677..00000000000 --- a/lucene/src/java/org/apache/lucene/index/FreqProxFieldMergeState.java +++ /dev/null @@ -1,115 +0,0 @@ -package org.apache.lucene.index; - -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import java.io.IOException; -import java.util.Comparator; - -import org.apache.lucene.util.ByteBlockPool; -import org.apache.lucene.util.BytesRef; - -import org.apache.lucene.index.FreqProxTermsWriterPerField.FreqProxPostingsArray; - -// TODO FI: some of this is "generic" to TermsHash* so we -// should factor it out so other consumers don't have to -// duplicate this code - -/** Used by DocumentsWriter to merge the postings from - * multiple ThreadStates when creating a segment */ -final class FreqProxFieldMergeState { - - final FreqProxTermsWriterPerField field; - final int numPostings; - private final ByteBlockPool bytePool; - final int[] termIDs; - final FreqProxPostingsArray postings; - int currentTermID; - - final BytesRef text = new BytesRef(); - - private int postingUpto = -1; - - final ByteSliceReader freq = new ByteSliceReader(); - final ByteSliceReader prox = new ByteSliceReader(); - - int docID; - int termFreq; - - public FreqProxFieldMergeState(FreqProxTermsWriterPerField field, Comparator termComp) { - this.field = field; - this.numPostings = field.termsHashPerField.bytesHash.size(); - this.bytePool = field.perThread.termsHashPerThread.bytePool; - this.termIDs = field.termsHashPerField.sortPostings(termComp); - this.postings = (FreqProxPostingsArray) field.termsHashPerField.postingsArray; - } - - boolean nextTerm() throws IOException { - postingUpto++; - if (postingUpto == numPostings) { - return false; - } - - currentTermID = termIDs[postingUpto]; - docID = 0; - - // Get BytesRef - final int textStart = postings.textStarts[currentTermID]; - bytePool.setBytesRef(text, textStart); - - field.termsHashPerField.initReader(freq, currentTermID, 0); - if (!field.fieldInfo.omitTermFreqAndPositions) { - field.termsHashPerField.initReader(prox, currentTermID, 1); - } - - // Should always be true - boolean result = nextDoc(); - assert result; - - return true; - } - - public boolean nextDoc() throws IOException { - if (freq.eof()) { - if (postings.lastDocCodes[currentTermID] != -1) { - // Return last doc - docID = postings.lastDocIDs[currentTermID]; - if (!field.omitTermFreqAndPositions) - termFreq = postings.docFreqs[currentTermID]; - postings.lastDocCodes[currentTermID] = -1; - return true; - } else - // EOF - return false; - } - - final int code = freq.readVInt(); - if (field.omitTermFreqAndPositions) - docID += code; - else { - docID += code >>> 1; - if ((code & 1) != 0) - termFreq = 1; - else - termFreq = freq.readVInt(); - } - - assert docID != postings.lastDocIDs[currentTermID]; - - return true; - } -} diff --git a/lucene/src/java/org/apache/lucene/index/FreqProxTermsWriter.java b/lucene/src/java/org/apache/lucene/index/FreqProxTermsWriter.java index d342cb47249..0622fc672f8 100644 --- a/lucene/src/java/org/apache/lucene/index/FreqProxTermsWriter.java +++ b/lucene/src/java/org/apache/lucene/index/FreqProxTermsWriter.java @@ -19,55 +19,35 @@ package org.apache.lucene.index; import java.io.IOException; import java.util.ArrayList; -import java.util.Collection; -import java.util.Comparator; import java.util.List; import java.util.Map; import org.apache.lucene.index.codecs.FieldsConsumer; -import org.apache.lucene.index.codecs.PostingsConsumer; -import org.apache.lucene.index.codecs.TermStats; -import org.apache.lucene.index.codecs.TermsConsumer; -import org.apache.lucene.util.BitVector; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.CollectionUtil; final class FreqProxTermsWriter extends TermsHashConsumer { - @Override - public TermsHashConsumerPerThread addThread(TermsHashPerThread perThread) { - return new FreqProxTermsWriterPerThread(perThread); - } - @Override void abort() {} - private int flushedDocCount; - // TODO: would be nice to factor out more of this, eg the // FreqProxFieldMergeState, and code to visit all Fields // under the same FieldInfo together, up into TermsHash*. // Other writers would presumably share alot of this... @Override - public void flush(Map> threadsAndFields, final SegmentWriteState state) throws IOException { + public void flush(Map fieldsToFlush, final SegmentWriteState state) throws IOException { // Gather all FieldData's that have postings, across all // ThreadStates List allFields = new ArrayList(); - - flushedDocCount = state.numDocs; - for (Map.Entry> entry : threadsAndFields.entrySet()) { - - Collection fields = entry.getValue(); - - - for (final TermsHashConsumerPerField i : fields) { - final FreqProxTermsWriterPerField perField = (FreqProxTermsWriterPerField) i; - if (perField.termsHashPerField.bytesHash.size() > 0) + for (TermsHashConsumerPerField f : fieldsToFlush.values()) { + final FreqProxTermsWriterPerField perField = (FreqProxTermsWriterPerField) f; + if (perField.termsHashPerField.bytesHash.size() > 0) { allFields.add(perField); - } + } } final int numAllFields = allFields.size(); @@ -77,6 +57,8 @@ final class FreqProxTermsWriter extends TermsHashConsumer { final FieldsConsumer consumer = state.segmentCodecs.codec().fieldsConsumer(state); + TermsHash termsHash = null; + /* Current writer chain: FieldsConsumer @@ -89,255 +71,48 @@ final class FreqProxTermsWriter extends TermsHashConsumer { -> IMPL: FormatPostingsPositionsWriter */ - int start = 0; - while(start < numAllFields) { - final FieldInfo fieldInfo = allFields.get(start).fieldInfo; - final String fieldName = fieldInfo.name; + for (int fieldNumber = 0; fieldNumber < numAllFields; fieldNumber++) { + final FieldInfo fieldInfo = allFields.get(fieldNumber).fieldInfo; - int end = start+1; - while(end < numAllFields && allFields.get(end).fieldInfo.name.equals(fieldName)) - end++; - - FreqProxTermsWriterPerField[] fields = new FreqProxTermsWriterPerField[end-start]; - for(int i=start;i> entry : threadsAndFields.entrySet()) { - FreqProxTermsWriterPerThread perThread = (FreqProxTermsWriterPerThread) entry.getKey(); - perThread.termsHashPerThread.reset(true); + if (termsHash != null) { + termsHash.reset(); } consumer.close(); } BytesRef payload; - /* Walk through all unique text tokens (Posting - * instances) found in this field and serialize them - * into a single RAM segment. */ - void appendPostings(String fieldName, SegmentWriteState state, - FreqProxTermsWriterPerField[] fields, - FieldsConsumer consumer) - throws CorruptIndexException, IOException { + @Override + public TermsHashConsumerPerField addField(TermsHashPerField termsHashPerField, FieldInfo fieldInfo) { + return new FreqProxTermsWriterPerField(termsHashPerField, this, fieldInfo); + } - int numFields = fields.length; + @Override + void finishDocument(TermsHash termsHash) throws IOException { + } - final BytesRef text = new BytesRef(); - - final FreqProxFieldMergeState[] mergeStates = new FreqProxFieldMergeState[numFields]; - - final TermsConsumer termsConsumer = consumer.addField(fields[0].fieldInfo); - final Comparator termComp = termsConsumer.getComparator(); - - for(int i=0;i 0; if (omitTermFreqAndPositions) { @@ -169,7 +177,7 @@ final class FreqProxTermsWriterPerField extends TermsHashConsumerPerField implem } } } - + @Override ParallelPostingsArray createPostingsArray(int size) { return new FreqProxPostingsArray(size); @@ -212,7 +220,180 @@ final class FreqProxTermsWriterPerField extends TermsHashConsumerPerField implem return ParallelPostingsArray.BYTES_PER_POSTING + 4 * RamUsageEstimator.NUM_BYTES_INT; } } - + public void abort() {} + + BytesRef payload; + + /* Walk through all unique text tokens (Posting + * instances) found in this field and serialize them + * into a single RAM segment. */ + void flush(String fieldName, FieldsConsumer consumer, final SegmentWriteState state) + throws CorruptIndexException, IOException { + + final TermsConsumer termsConsumer = consumer.addField(fieldInfo); + final Comparator termComp = termsConsumer.getComparator(); + + final Term protoTerm = new Term(fieldName); + + final boolean currentFieldOmitTermFreqAndPositions = fieldInfo.omitTermFreqAndPositions; + + final Map segDeletes; + if (state.segDeletes != null && state.segDeletes.terms.size() > 0) { + segDeletes = state.segDeletes.terms; + } else { + segDeletes = null; + } + + final int[] termIDs = termsHashPerField.sortPostings(termComp); + final int numTerms = termsHashPerField.bytesHash.size(); + final BytesRef text = new BytesRef(); + final FreqProxPostingsArray postings = (FreqProxPostingsArray) termsHashPerField.postingsArray; + final ByteSliceReader freq = new ByteSliceReader(); + final ByteSliceReader prox = new ByteSliceReader(); + + long sumTotalTermFreq = 0; + for (int i = 0; i < numTerms; i++) { + final int termID = termIDs[i]; + // Get BytesRef + final int textStart = postings.textStarts[termID]; + termsHashPerField.bytePool.setBytesRef(text, textStart); + + termsHashPerField.initReader(freq, termID, 0); + if (!fieldInfo.omitTermFreqAndPositions) { + termsHashPerField.initReader(prox, termID, 1); + } + + // TODO: really TermsHashPerField should take over most + // of this loop, including merge sort of terms from + // multiple threads and interacting with the + // TermsConsumer, only calling out to us (passing us the + // DocsConsumer) to handle delivery of docs/positions + + final PostingsConsumer postingsConsumer = termsConsumer.startTerm(text); + + final int delDocLimit; + if (segDeletes != null) { + final Integer docIDUpto = segDeletes.get(protoTerm.createTerm(text)); + if (docIDUpto != null) { + delDocLimit = docIDUpto; + } else { + delDocLimit = 0; + } + } else { + delDocLimit = 0; + } + + // Now termStates has numToMerge FieldMergeStates + // which all share the same term. Now we must + // interleave the docID streams. + int numDocs = 0; + long totTF = 0; + int docID = 0; + int termFreq = 0; + + while(true) { + if (freq.eof()) { + if (postings.lastDocCodes[termID] != -1) { + // Return last doc + docID = postings.lastDocIDs[termID]; + if (!omitTermFreqAndPositions) { + termFreq = postings.docFreqs[termID]; + } + postings.lastDocCodes[termID] = -1; + } else { + // EOF + break; + } + } else { + final int code = freq.readVInt(); + if (omitTermFreqAndPositions) { + docID += code; + } else { + docID += code >>> 1; + if ((code & 1) != 0) { + termFreq = 1; + } else { + termFreq = freq.readVInt(); + } + } + + assert docID != postings.lastDocIDs[termID]; + } + + numDocs++; + assert docID < state.numDocs: "doc=" + docID + " maxDoc=" + state.numDocs; + final int termDocFreq = termFreq; + + // NOTE: we could check here if the docID was + // deleted, and skip it. However, this is somewhat + // dangerous because it can yield non-deterministic + // behavior since we may see the docID before we see + // the term that caused it to be deleted. This + // would mean some (but not all) of its postings may + // make it into the index, which'd alter the docFreq + // for those terms. We could fix this by doing two + // passes, ie first sweep marks all del docs, and + // 2nd sweep does the real flush, but I suspect + // that'd add too much time to flush. + postingsConsumer.startDoc(docID, termDocFreq); + if (docID < delDocLimit) { + // Mark it deleted. TODO: we could also skip + // writing its postings; this would be + // deterministic (just for this Term's docs). + if (state.deletedDocs == null) { + state.deletedDocs = new BitVector(state.numDocs); + } + state.deletedDocs.set(docID); + } + + // Carefully copy over the prox + payload info, + // changing the format to match Lucene's segment + // format. + if (!currentFieldOmitTermFreqAndPositions) { + // omitTermFreqAndPositions == false so we do write positions & + // payload + int position = 0; + totTF += termDocFreq; + for(int j=0;j> 1; + + final int payloadLength; + final BytesRef thisPayload; + + if ((code & 1) != 0) { + // This position has a payload + payloadLength = prox.readVInt(); + + if (payload == null) { + payload = new BytesRef(); + payload.bytes = new byte[payloadLength]; + } else if (payload.bytes.length < payloadLength) { + payload.grow(payloadLength); + } + + prox.readBytes(payload.bytes, 0, payloadLength); + payload.length = payloadLength; + thisPayload = payload; + + } else { + payloadLength = 0; + thisPayload = null; + } + + postingsConsumer.addPosition(position, thisPayload); + } + + postingsConsumer.finishDoc(); + } + } + termsConsumer.finishTerm(text, new TermStats(numDocs, totTF)); + sumTotalTermFreq += totTF; + } + + termsConsumer.finish(sumTotalTermFreq); + } + } diff --git a/lucene/src/java/org/apache/lucene/index/FrozenBufferedDeletes.java b/lucene/src/java/org/apache/lucene/index/FrozenBufferedDeletes.java index b54213966ac..8ff3142e6ef 100644 --- a/lucene/src/java/org/apache/lucene/index/FrozenBufferedDeletes.java +++ b/lucene/src/java/org/apache/lucene/index/FrozenBufferedDeletes.java @@ -52,9 +52,15 @@ class FrozenBufferedDeletes { final int[] queryLimits; final int bytesUsed; final int numTermDeletes; - final long gen; + private long gen = -1; // assigned by BufferedDeletesStream once pushed + final boolean isSegmentPrivate; // set to true iff this frozen packet represents + // a segment private deletes. in that case is should + // only have Queries - public FrozenBufferedDeletes(BufferedDeletes deletes, long gen) { + + public FrozenBufferedDeletes(BufferedDeletes deletes, boolean isSegmentPrivate) { + this.isSegmentPrivate = isSegmentPrivate; + assert !isSegmentPrivate || deletes.terms.size() == 0 : "segment private package should only have del queries"; terms = deletes.terms.keySet().toArray(new Term[deletes.terms.size()]); queries = new Query[deletes.queries.size()]; queryLimits = new int[deletes.queries.size()]; @@ -66,8 +72,17 @@ class FrozenBufferedDeletes { } bytesUsed = terms.length * BYTES_PER_DEL_TERM + queries.length * BYTES_PER_DEL_QUERY; numTermDeletes = deletes.numTermDeletes.get(); + } + + public void setDelGen(long gen) { + assert this.gen == -1; this.gen = gen; } + + public long delGen() { + assert gen != -1; + return gen; + } public Iterable termsIterable() { return new Iterable() { diff --git a/lucene/src/java/org/apache/lucene/index/Healthiness.java b/lucene/src/java/org/apache/lucene/index/Healthiness.java new file mode 100644 index 00000000000..dcb9868ab0d --- /dev/null +++ b/lucene/src/java/org/apache/lucene/index/Healthiness.java @@ -0,0 +1,121 @@ +package org.apache.lucene.index; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +import java.util.concurrent.locks.AbstractQueuedSynchronizer; + +import org.apache.lucene.index.DocumentsWriterPerThreadPool.ThreadState; + +/** + * Controls the health status of a {@link DocumentsWriter} sessions. This class + * used to block incoming indexing threads if flushing significantly slower than + * indexing to ensure the {@link DocumentsWriter}s healthiness. If flushing is + * significantly slower than indexing the net memory used within an + * {@link IndexWriter} session can increase very quickly and easily exceed the + * JVM's available memory. + *

+ * To prevent OOM Errors and ensure IndexWriter's stability this class blocks + * incoming threads from indexing once 2 x number of available + * {@link ThreadState}s in {@link DocumentsWriterPerThreadPool} is exceeded. + * Once flushing catches up and the number of flushing DWPT is equal or lower + * than the number of active {@link ThreadState}s threads are released and can + * continue indexing. + */ +//TODO: rename this to DocumentsWriterStallControl (or something like that)? +final class Healthiness { + + @SuppressWarnings("serial") + private static final class Sync extends AbstractQueuedSynchronizer { + volatile boolean hasBlockedThreads = false; // only with assert + + Sync() { + setState(0); + } + + boolean isHealthy() { + return getState() == 0; + } + + boolean trySetStalled() { + int state = getState(); + return compareAndSetState(state, state + 1); + } + + boolean tryReset() { + final int oldState = getState(); + if (oldState == 0) + return true; + if (compareAndSetState(oldState, 0)) { + releaseShared(0); + return true; + } + return false; + } + + @Override + public int tryAcquireShared(int acquires) { + assert maybeSetHasBlocked(getState()); + return getState() == 0 ? 1 : -1; + } + + // only used for testing + private boolean maybeSetHasBlocked(int state) { + hasBlockedThreads |= getState() != 0; + return true; + } + + @Override + public boolean tryReleaseShared(int newState) { + return (getState() == 0); + } + } + + private final Sync sync = new Sync(); + volatile boolean wasStalled = false; // only with asserts + + boolean anyStalledThreads() { + return !sync.isHealthy(); + } + + /** + * Update the stalled flag status. This method will set the stalled flag to + * true iff the number of flushing + * {@link DocumentsWriterPerThread} is greater than the number of active + * {@link DocumentsWriterPerThread}. Otherwise it will reset the + * {@link Healthiness} to healthy and release all threads waiting on + * {@link #waitIfStalled()} + */ + void updateStalled(DocumentsWriterFlushControl flushControl) { + do { + // if we have more flushing DWPT than numActiveDWPT we stall! + while (flushControl.numActiveDWPT() < flushControl.numFlushingDWPT()) { + if (sync.trySetStalled()) { + assert wasStalled = true; + return; + } + } + } while (!sync.tryReset()); + } + + void waitIfStalled() { + sync.acquireShared(0); + } + + boolean hasBlocked() { + return sync.hasBlockedThreads; + } +} \ No newline at end of file diff --git a/lucene/src/java/org/apache/lucene/index/IndexFileDeleter.java b/lucene/src/java/org/apache/lucene/index/IndexFileDeleter.java index 5d2f959bc65..ecf41bacabc 100644 --- a/lucene/src/java/org/apache/lucene/index/IndexFileDeleter.java +++ b/lucene/src/java/org/apache/lucene/index/IndexFileDeleter.java @@ -21,7 +21,13 @@ import java.io.FileNotFoundException; import java.io.FilenameFilter; import java.io.IOException; import java.io.PrintStream; -import java.util.*; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.Date; +import java.util.HashMap; +import java.util.List; +import java.util.Map; import org.apache.lucene.index.codecs.CodecProvider; import org.apache.lucene.store.Directory; @@ -49,12 +55,12 @@ import org.apache.lucene.util.CollectionUtil; * (IndexDeletionPolicy) is consulted on creation (onInit) * and once per commit (onCommit), to decide when a commit * should be removed. - * + * * It is the business of the IndexDeletionPolicy to choose * when to delete commit points. The actual mechanics of * file deletion, retrying, etc, derived from the deletion * of commit points is the business of the IndexFileDeleter. - * + * * The current default deletion policy is {@link * KeepOnlyLastCommitDeletionPolicy}, which removes all * prior commits when a new commit has completed. This @@ -72,7 +78,7 @@ final class IndexFileDeleter { * so we will retry them again later: */ private List deletable; - /* Reference count for all files in the index. + /* Reference count for all files in the index. * Counts how many existing commits reference a file. **/ private Map refCounts = new HashMap(); @@ -88,7 +94,7 @@ final class IndexFileDeleter { * non-commit checkpoint: */ private List> lastFiles = new ArrayList>(); - /* Commits that the IndexDeletionPolicy have decided to delete: */ + /* Commits that the IndexDeletionPolicy have decided to delete: */ private List commitsToDelete = new ArrayList(); private PrintStream infoStream; @@ -108,7 +114,7 @@ final class IndexFileDeleter { message("setInfoStream deletionPolicy=" + policy); } } - + private void message(String message) { infoStream.println("IFD [" + new Date() + "; " + Thread.currentThread().getName() + "]: " + message); } @@ -139,12 +145,12 @@ final class IndexFileDeleter { // counts: long currentGen = segmentInfos.getGeneration(); indexFilenameFilter = new IndexFileNameFilter(codecs); - + CommitPoint currentCommitPoint = null; String[] files = null; try { files = directory.listAll(); - } catch (NoSuchDirectoryException e) { + } catch (NoSuchDirectoryException e) { // it means the directory is empty, so ignore it. files = new String[0]; } @@ -152,7 +158,7 @@ final class IndexFileDeleter { for (String fileName : files) { if ((indexFilenameFilter.accept(null, fileName)) && !fileName.endsWith("write.lock") && !fileName.equals(IndexFileNames.SEGMENTS_GEN)) { - + // Add this file to refCounts with initial count 0: getRefCount(fileName); @@ -233,7 +239,7 @@ final class IndexFileDeleter { // Now delete anything with ref count at 0. These are // presumably abandoned files eg due to crash of // IndexWriter. - for(Map.Entry entry : refCounts.entrySet() ) { + for(Map.Entry entry : refCounts.entrySet() ) { RefCount rc = entry.getValue(); final String fileName = entry.getKey(); if (0 == rc.count) { @@ -253,7 +259,7 @@ final class IndexFileDeleter { // Always protect the incoming segmentInfos since // sometime it may not be the most recent commit checkpoint(segmentInfos, false); - + startingCommitDeleted = currentCommitPoint == null ? false : currentCommitPoint.isDeleted(); deleteCommits(); @@ -327,7 +333,7 @@ final class IndexFileDeleter { segmentPrefix1 = null; segmentPrefix2 = null; } - + for(int i=0;i oldDeletable = deletable; @@ -397,7 +403,7 @@ final class IndexFileDeleter { /** * For definition of "check point" see IndexWriter comments: * "Clarification: Check Points (and commits)". - * + * * Writer calls this when it has made a "consistent * change" to the index, meaning new files are written to * the index and the in-memory SegmentInfos have been @@ -417,7 +423,7 @@ final class IndexFileDeleter { public void checkpoint(SegmentInfos segmentInfos, boolean isCommit) throws IOException { if (infoStream != null) { - message("now checkpoint \"" + segmentInfos.getCurrentSegmentFileName() + "\" [" + segmentInfos.size() + " segments " + "; isCommit = " + isCommit + "]"); + message("now checkpoint \"" + segmentInfos + "\" [" + segmentInfos.size() + " segments " + "; isCommit = " + isCommit + "]"); } // Try again now to delete any previously un-deletable diff --git a/lucene/src/java/org/apache/lucene/index/IndexReader.java b/lucene/src/java/org/apache/lucene/index/IndexReader.java index ae49b504868..984f77b7117 100644 --- a/lucene/src/java/org/apache/lucene/index/IndexReader.java +++ b/lucene/src/java/org/apache/lucene/index/IndexReader.java @@ -23,6 +23,7 @@ import org.apache.lucene.search.FieldCache; // javadocs import org.apache.lucene.search.Similarity; import org.apache.lucene.index.codecs.Codec; import org.apache.lucene.index.codecs.CodecProvider; +import org.apache.lucene.index.codecs.PerDocValues; import org.apache.lucene.index.values.DocValues; import org.apache.lucene.store.*; import org.apache.lucene.util.ArrayUtil; @@ -923,6 +924,22 @@ public abstract class IndexReader implements Cloneable,Closeable { } } + /** + * Returns true if an index exists at the specified directory. + * @param directory the directory to check for an index + * @param codecProvider provides a CodecProvider in case the index uses non-core codecs + * @return true if an index exists; false otherwise + * @throws IOException if there is a problem with accessing the index + */ + public static boolean indexExists(Directory directory, CodecProvider codecProvider) throws IOException { + try { + new SegmentInfos().read(directory, codecProvider); + return true; + } catch (IOException ioe) { + return false; + } + } + /** Returns the number of documents in this index. */ public abstract int numDocs(); @@ -1051,6 +1068,9 @@ public abstract class IndexReader implements Cloneable,Closeable { * using {@link ReaderUtil#gatherSubReaders} and iterate * through them yourself. */ public abstract Fields fields() throws IOException; + + // nocommit javadoc + public abstract PerDocValues perDocValues() throws IOException; public int docFreq(Term term) throws IOException { return docFreq(term.field(), term.bytes()); @@ -1554,11 +1574,11 @@ public abstract class IndexReader implements Cloneable,Closeable { } public DocValues docValues(String field) throws IOException { - final Fields fields = fields(); - if (fields == null) { + final PerDocValues perDoc = perDocValues(); + if (perDoc == null) { return null; } - return fields.docValues(field); + return perDoc.docValues(field); } private volatile Fields fields; @@ -1572,6 +1592,19 @@ public abstract class IndexReader implements Cloneable,Closeable { Fields retrieveFields() { return fields; } + + private volatile PerDocValues perDocValues; + + /** @lucene.internal */ + void storePerDoc(PerDocValues perDocValues) { + this.perDocValues = perDocValues; + } + + /** @lucene.internal */ + PerDocValues retrievePerDoc() { + return perDocValues; + } + /** * A struct like class that represents a hierarchical relationship between diff --git a/lucene/src/java/org/apache/lucene/index/IndexWriter.java b/lucene/src/java/org/apache/lucene/index/IndexWriter.java index cedd1990905..166a6d594dd 100644 --- a/lucene/src/java/org/apache/lucene/index/IndexWriter.java +++ b/lucene/src/java/org/apache/lucene/index/IndexWriter.java @@ -35,6 +35,7 @@ import java.util.concurrent.ConcurrentHashMap; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.document.Document; +import org.apache.lucene.index.DocumentsWriterPerThread.FlushedSegment; import org.apache.lucene.index.FieldInfos.FieldNumberBiMap; import org.apache.lucene.index.IndexWriterConfig.OpenMode; import org.apache.lucene.index.PayloadProcessorProvider.DirPayloadProcessor; @@ -46,6 +47,7 @@ import org.apache.lucene.store.BufferedIndexInput; import org.apache.lucene.store.Directory; import org.apache.lucene.store.Lock; import org.apache.lucene.store.LockObtainFailedException; +import org.apache.lucene.util.BitVector; import org.apache.lucene.util.Bits; import org.apache.lucene.util.Constants; import org.apache.lucene.util.ThreadInterruptedException; @@ -54,17 +56,16 @@ import org.apache.lucene.util.MapBackedSet; /** An IndexWriter creates and maintains an index. -

The create argument to the {@link - #IndexWriter(Directory, IndexWriterConfig) constructor} determines +

The {@link OpenMode} option on + {@link IndexWriterConfig#setOpenMode(OpenMode)} determines whether a new index is created, or whether an existing index is - opened. Note that you can open an index with create=true - even while readers are using the index. The old readers will + opened. Note that you can open an index with {@link OpenMode#CREATE} + even while readers are using the index. The old readers will continue to search the "point in time" snapshot they had opened, - and won't see the newly created index until they re-open. There are - also {@link #IndexWriter(Directory, IndexWriterConfig) constructors} - with no create argument which will create a new index - if there is not already an index at the provided path and otherwise - open the existing index.

+ and won't see the newly created index until they re-open. If + {@link OpenMode#CREATE_OR_APPEND} is used IndexWriter will create a + new index if there is not already an index at the provided path + and otherwise open the existing index.

In either case, documents are added with {@link #addDocument(Document) addDocument} and removed with {@link #deleteDocuments(Term)} or {@link @@ -76,15 +77,19 @@ import org.apache.lucene.util.MapBackedSet;

These changes are buffered in memory and periodically flushed to the {@link Directory} (during the above method - calls). A flush is triggered when there are enough - buffered deletes (see {@link IndexWriterConfig#setMaxBufferedDeleteTerms}) - or enough added documents since the last flush, whichever - is sooner. For the added documents, flushing is triggered - either by RAM usage of the documents (see {@link - IndexWriterConfig#setRAMBufferSizeMB}) or the number of added documents. - The default is to flush when RAM usage hits 16 MB. For + calls). A flush is triggered when there are enough added documents + since the last flush. Flushing is triggered either by RAM usage of the + documents (see {@link IndexWriterConfig#setRAMBufferSizeMB}) or the + number of added documents (see {@link IndexWriterConfig#setMaxBufferedDocs(int)}). + The default is to flush when RAM usage hits + {@link IndexWriterConfig#DEFAULT_RAM_BUFFER_SIZE_MB} MB. For best indexing speed you should flush by RAM usage with a - large RAM buffer. Note that flushing just moves the + large RAM buffer. Additionally, if IndexWriter reaches the configured number of + buffered deletes (see {@link IndexWriterConfig#setMaxBufferedDeleteTerms}) + the deleted terms and queries are flushed and applied to existing segments. + In contrast to the other flush options {@link IndexWriterConfig#setRAMBufferSizeMB} and + {@link IndexWriterConfig#setMaxBufferedDocs(int)}, deleted terms + won't trigger a segment flush. Note that flushing just moves the internal buffered state in IndexWriter into the index, but these changes are not visible to IndexReader until either {@link #commit()} or {@link #close} is called. A flush may @@ -165,21 +170,21 @@ import org.apache.lucene.util.MapBackedSet; /* * Clarification: Check Points (and commits) * IndexWriter writes new index files to the directory without writing a new segments_N - * file which references these new files. It also means that the state of + * file which references these new files. It also means that the state of * the in memory SegmentInfos object is different than the most recent * segments_N file written to the directory. - * - * Each time the SegmentInfos is changed, and matches the (possibly - * modified) directory files, we have a new "check point". - * If the modified/new SegmentInfos is written to disk - as a new - * (generation of) segments_N file - this check point is also an + * + * Each time the SegmentInfos is changed, and matches the (possibly + * modified) directory files, we have a new "check point". + * If the modified/new SegmentInfos is written to disk - as a new + * (generation of) segments_N file - this check point is also an * IndexCommit. - * - * A new checkpoint always replaces the previous checkpoint and - * becomes the new "front" of the index. This allows the IndexFileDeleter + * + * A new checkpoint always replaces the previous checkpoint and + * becomes the new "front" of the index. This allows the IndexFileDeleter * to delete files that are referenced only by stale checkpoints. * (files that were created since the last commit, but are no longer - * referenced by the "front" of the index). For this, IndexFileDeleter + * referenced by the "front" of the index). For this, IndexFileDeleter * keeps track of the last non commit checkpoint. */ public class IndexWriter implements Closeable { @@ -195,7 +200,7 @@ public class IndexWriter implements Closeable { * printed to infoStream, if set (see {@link * #setInfoStream}). */ - public final static int MAX_TERM_LENGTH = DocumentsWriter.MAX_TERM_LENGTH_UTF8; + public final static int MAX_TERM_LENGTH = DocumentsWriterPerThread.MAX_TERM_LENGTH_UTF8; // The normal read buffer size defaults to 1024, but // increasing this during merging seems to yield @@ -225,7 +230,7 @@ public class IndexWriter implements Closeable { final FieldNumberBiMap globalFieldNumberMap; private DocumentsWriter docWriter; - private IndexFileDeleter deleter; + final IndexFileDeleter deleter; private Set segmentsToOptimize = new HashSet(); // used by optimize to note those needing optimization private int optimizeMaxNumSegments; @@ -247,12 +252,12 @@ public class IndexWriter implements Closeable { private long mergeGen; private boolean stopMerges; - private final AtomicInteger flushCount = new AtomicInteger(); - private final AtomicInteger flushDeletesCount = new AtomicInteger(); + final AtomicInteger flushCount = new AtomicInteger(); + final AtomicInteger flushDeletesCount = new AtomicInteger(); final ReaderPool readerPool = new ReaderPool(); final BufferedDeletesStream bufferedDeletesStream; - + // This is a "write once" variable (like the organic dye // on a DVD-R that may or may not be heated by a laser and // then cooled to permanently record the event): it's @@ -339,31 +344,58 @@ public class IndexWriter implements Closeable { */ IndexReader getReader(boolean applyAllDeletes) throws IOException { ensureOpen(); - + final long tStart = System.currentTimeMillis(); if (infoStream != null) { message("flush at getReader"); } - // Do this up front before flushing so that the readers // obtained during this flush are pooled, the first time // this method is called: poolReaders = true; - - // Prevent segmentInfos from changing while opening the - // reader; in theory we could do similar retry logic, - // just like we do when loading segments_N - IndexReader r; - synchronized(this) { - flush(false, applyAllDeletes); - r = new DirectoryReader(this, segmentInfos, config.getReaderTermsIndexDivisor(), codecs, applyAllDeletes); - if (infoStream != null) { - message("return reader version=" + r.getVersion() + " reader=" + r); + final IndexReader r; + doBeforeFlush(); + final boolean anySegmentFlushed; + /* + * for releasing a NRT reader we must ensure that + * DW doesn't add any segments or deletes until we are + * done with creating the NRT DirectoryReader. + * We release the two stage full flush after we are done opening the + * directory reader! + */ + synchronized (fullFlushLock) { + boolean success = false; + try { + anySegmentFlushed = docWriter.flushAllThreads(); + if (!anySegmentFlushed) { + // prevent double increment since docWriter#doFlush increments the flushcount + // if we flushed anything. + flushCount.incrementAndGet(); + } + success = true; + // Prevent segmentInfos from changing while opening the + // reader; in theory we could do similar retry logic, + // just like we do when loading segments_N + synchronized(this) { + maybeApplyDeletes(applyAllDeletes); + r = new DirectoryReader(this, segmentInfos, config.getReaderTermsIndexDivisor(), codecs, applyAllDeletes); + if (infoStream != null) { + message("return reader version=" + r.getVersion() + " reader=" + r); + } + } + } finally { + if (!success && infoStream != null) { + message("hit exception during while NRT reader"); + } + // Done: finish the full flush! + docWriter.finishFullFlush(success); + doAfterFlush(); } } - maybeMerge(); - + if (anySegmentFlushed) { + maybeMerge(); + } if (infoStream != null) { message("getReader took " + (System.currentTimeMillis() - tStart) + " msec"); } @@ -400,10 +432,10 @@ public class IndexWriter implements Closeable { if (r != null) { r.hasChanges = false; } - } + } } } - + // used only by asserts public synchronized boolean infoIsLive(SegmentInfo info) { int idx = segmentInfos.indexOf(info); @@ -419,7 +451,7 @@ public class IndexWriter implements Closeable { } return info; } - + /** * Release the segment reader (i.e. decRef it and close if there * are no more references. @@ -432,7 +464,7 @@ public class IndexWriter implements Closeable { public synchronized boolean release(SegmentReader sr) throws IOException { return release(sr, false); } - + /** * Release the segment reader (i.e. decRef it and close if there * are no more references. @@ -493,7 +525,7 @@ public class IndexWriter implements Closeable { sr.close(); } } - + /** Remove all our references to readers, and commits * any pending changes. */ synchronized void close() throws IOException { @@ -503,7 +535,7 @@ public class IndexWriter implements Closeable { Iterator> iter = readerMap.entrySet().iterator(); while (iter.hasNext()) { - + Map.Entry ent = iter.next(); SegmentReader sr = ent.getValue(); @@ -526,7 +558,7 @@ public class IndexWriter implements Closeable { sr.decRef(); } } - + /** * Commit all segment reader in the pool. * @throws IOException @@ -550,7 +582,7 @@ public class IndexWriter implements Closeable { } } } - + /** * Returns a ref to a clone. NOTE: this clone is not * enrolled in the pool, so you should simply close() @@ -564,7 +596,7 @@ public class IndexWriter implements Closeable { sr.decRef(); } } - + /** * Obtain a SegmentReader from the readerPool. The reader * must be returned by calling {@link #release(SegmentReader)} @@ -580,7 +612,7 @@ public class IndexWriter implements Closeable { /** * Obtain a SegmentReader from the readerPool. The reader * must be returned by calling {@link #release(SegmentReader)} - * + * * @see #release(SegmentReader) * @param info * @param doOpenStores @@ -638,7 +670,7 @@ public class IndexWriter implements Closeable { return sr; } } - + /** * Obtain the number of deleted docs for a pooled reader. * If the reader isn't being pooled, the segmentInfo's @@ -658,7 +690,7 @@ public class IndexWriter implements Closeable { } } } - + /** * Used internally to throw an {@link * AlreadyClosedException} if this IndexWriter has been @@ -721,7 +753,7 @@ public class IndexWriter implements Closeable { mergePolicy.setIndexWriter(this); mergeScheduler = conf.getMergeScheduler(); codecs = conf.getCodecProvider(); - + bufferedDeletesStream = new BufferedDeletesStream(messageID); bufferedDeletesStream.setInfoStream(infoStream); poolReaders = conf.getReaderPooling(); @@ -790,8 +822,7 @@ public class IndexWriter implements Closeable { // start with previous field numbers, but new FieldInfos globalFieldNumberMap = segmentInfos.getOrLoadGlobalFieldNumberMap(directory); - docWriter = new DocumentsWriter(config, directory, this, conf.getIndexingChain(), - globalFieldNumberMap.newFieldInfos(SegmentCodecsBuilder.create(codecs)), bufferedDeletesStream); + docWriter = new DocumentsWriter(config, directory, this, globalFieldNumberMap, bufferedDeletesStream); docWriter.setInfoStream(infoStream); // Default deleter (for backwards compatibility) is @@ -849,7 +880,7 @@ public class IndexWriter implements Closeable { public IndexWriterConfig getConfig() { return config; } - + /** If non-null, this will be the default infoStream used * by a newly instantiated IndexWriter. * @see #setInfoStream @@ -871,7 +902,7 @@ public class IndexWriter implements Closeable { * message when maxFieldLength is reached will be printed * to this. */ - public void setInfoStream(PrintStream infoStream) { + public void setInfoStream(PrintStream infoStream) throws IOException { ensureOpen(); this.infoStream = infoStream; docWriter.setInfoStream(infoStream); @@ -881,7 +912,7 @@ public class IndexWriter implements Closeable { messageState(); } - private void messageState() { + private void messageState() throws IOException { message("\ndir=" + directory + "\n" + "index=" + segString() + "\n" + "version=" + Constants.LUCENE_VERSION + "\n" + @@ -901,7 +932,7 @@ public class IndexWriter implements Closeable { public boolean verbose() { return infoStream != null; } - + /** * Commits all changes to an index and closes all * associated files. Note that this may be a costly @@ -916,7 +947,7 @@ public class IndexWriter implements Closeable { * even though part of it (flushing buffered documents) * may have succeeded, so the write lock will still be * held.

- * + * *

If you can correct the underlying cause (eg free up * some disk space) then you can call close() again. * Failing that, if you want to force the write lock to be @@ -1036,7 +1067,7 @@ public class IndexWriter implements Closeable { if (infoStream != null) message("now call final commit()"); - + if (!hitOOM) { commitInternal(null); } @@ -1049,7 +1080,7 @@ public class IndexWriter implements Closeable { docWriter = null; deleter.close(); } - + if (writeLock != null) { writeLock.release(); // release write lock writeLock = null; @@ -1072,7 +1103,7 @@ public class IndexWriter implements Closeable { } /** Returns the Directory used by this index. */ - public Directory getDirectory() { + public Directory getDirectory() { // Pass false because the flush during closing calls getDirectory ensureOpen(false); return directory; @@ -1196,22 +1227,7 @@ public class IndexWriter implements Closeable { * @throws IOException if there is a low-level IO error */ public void addDocument(Document doc, Analyzer analyzer) throws CorruptIndexException, IOException { - ensureOpen(); - boolean doFlush = false; - boolean success = false; - try { - try { - doFlush = docWriter.updateDocument(doc, analyzer, null); - success = true; - } finally { - if (!success && infoStream != null) - message("hit exception adding document"); - } - if (doFlush) - flush(true, false); - } catch (OutOfMemoryError oom) { - handleOOM(oom, "addDocument"); - } + updateDocument(null, doc, analyzer); } /** @@ -1228,9 +1244,7 @@ public class IndexWriter implements Closeable { public void deleteDocuments(Term term) throws CorruptIndexException, IOException { ensureOpen(); try { - if (docWriter.deleteTerm(term, false)) { - flush(true, false); - } + docWriter.deleteTerms(term); } catch (OutOfMemoryError oom) { handleOOM(oom, "deleteDocuments(Term)"); } @@ -1238,7 +1252,8 @@ public class IndexWriter implements Closeable { /** * Deletes the document(s) containing any of the - * terms. All deletes are flushed at the same time. + * terms. All given deletes are applied and flushed atomically + * at the same time. * *

NOTE: if this method hits an OutOfMemoryError * you should immediately close the writer. See NOTE: if this method hits an OutOfMemoryError * you should immediately close the writer. See (segmentInfos); optimizeMaxNumSegments = maxNumSegments; - + // Now mark all pending & running merges as optimize // merge: for(final MergePolicy.OneMerge merge : pendingMerges) { @@ -1612,12 +1622,12 @@ public class IndexWriter implements Closeable { if (merge.optimize) return true; } - + for (final MergePolicy.OneMerge merge : runningMerges) { if (merge.optimize) return true; } - + return false; } @@ -1640,6 +1650,8 @@ public class IndexWriter implements Closeable { throws CorruptIndexException, IOException { ensureOpen(); + flush(true, true); + if (infoStream != null) message("expungeDeletes: index now " + segString()); @@ -1712,6 +1724,10 @@ public class IndexWriter implements Closeable { * documents, so you must do so yourself if necessary. * See also {@link #expungeDeletes(boolean)} * + *

NOTE: this method first flushes a new + * segment (if there are indexed documents), and applies + * all buffered deletes. + * *

NOTE: if this method hits an OutOfMemoryError * you should immediately close the writer. See above for details.

@@ -1908,7 +1924,7 @@ public class IndexWriter implements Closeable { /** * Delete all documents in the index. * - *

This method will drop all buffered documents and will + *

This method will drop all buffered documents and will * remove all segments from the index. This change will not be * visible until a {@link #commit()} has been called. This method * can be rolled back using {@link #rollback()}.

@@ -1938,7 +1954,7 @@ public class IndexWriter implements Closeable { deleter.refresh(); // Don't bother saving any changes in our segmentInfos - readerPool.clear(null); + readerPool.clear(null); // Mark that the index has changed ++changeCount; @@ -1965,7 +1981,7 @@ public class IndexWriter implements Closeable { mergeFinish(merge); } pendingMerges.clear(); - + for (final MergePolicy.OneMerge merge : runningMerges) { if (infoStream != null) message("now abort running merge " + merge.segString(directory)); @@ -1992,7 +2008,7 @@ public class IndexWriter implements Closeable { message("all running merges have aborted"); } else { - // waitForMerges() will ensure any running addIndexes finishes. + // waitForMerges() will ensure any running addIndexes finishes. // It's fine if a new one attempts to start because from our // caller above the call will see that we are in the // process of closing, and will throw an @@ -2004,7 +2020,7 @@ public class IndexWriter implements Closeable { /** * Wait for any currently outstanding merges to finish. * - *

It is guaranteed that any merges started prior to calling this method + *

It is guaranteed that any merges started prior to calling this method * will have completed once this method completes.

*/ public synchronized void waitForMerges() { @@ -2034,6 +2050,125 @@ public class IndexWriter implements Closeable { deleter.checkpoint(segmentInfos, false); } + /** + * Prepares the {@link SegmentInfo} for the new flushed segment and persists + * the deleted documents {@link BitVector}. Use + * {@link #publishFlushedSegment(SegmentInfo, FrozenBufferedDeletes)} to + * publish the returned {@link SegmentInfo} together with its segment private + * delete packet. + * + * @see #publishFlushedSegment(SegmentInfo, FrozenBufferedDeletes) + */ + SegmentInfo prepareFlushedSegment(FlushedSegment flushedSegment) throws IOException { + assert flushedSegment != null; + + SegmentInfo newSegment = flushedSegment.segmentInfo; + + setDiagnostics(newSegment, "flush"); + + boolean success = false; + try { + if (useCompoundFile(newSegment)) { + String compoundFileName = IndexFileNames.segmentFileName(newSegment.name, "", IndexFileNames.COMPOUND_FILE_EXTENSION); + message("creating compound file " + compoundFileName); + // Now build compound file + CompoundFileWriter cfsWriter = new CompoundFileWriter(directory, compoundFileName); + for(String fileName : newSegment.files()) { + cfsWriter.addFile(fileName); + } + + // Perform the merge + cfsWriter.close(); + synchronized(this) { + deleter.deleteNewFiles(newSegment.files()); + } + + newSegment.setUseCompoundFile(true); + } + + // Must write deleted docs after the CFS so we don't + // slurp the del file into CFS: + if (flushedSegment.deletedDocuments != null) { + final int delCount = flushedSegment.deletedDocuments.count(); + assert delCount > 0; + newSegment.setDelCount(delCount); + newSegment.advanceDelGen(); + final String delFileName = newSegment.getDelFileName(); + if (infoStream != null) { + message("flush: write " + delCount + " deletes to " + delFileName); + } + boolean success2 = false; + try { + // TODO: in the NRT case it'd be better to hand + // this del vector over to the + // shortly-to-be-opened SegmentReader and let it + // carry the changes; there's no reason to use + // filesystem as intermediary here. + flushedSegment.deletedDocuments.write(directory, delFileName); + success2 = true; + } finally { + if (!success2) { + try { + directory.deleteFile(delFileName); + } catch (Throwable t) { + // suppress this so we keep throwing the + // original exception + } + } + } + } + + success = true; + } finally { + if (!success) { + if (infoStream != null) { + message("hit exception " + + "reating compound file for newly flushed segment " + newSegment.name); + } + + synchronized(this) { + deleter.refresh(newSegment.name); + } + } + } + return newSegment; + } + + /** + * Atomically adds the segment private delete packet and publishes the flushed + * segments SegmentInfo to the index writer. NOTE: use + * {@link #prepareFlushedSegment(FlushedSegment)} to obtain the + * {@link SegmentInfo} for the flushed segment. + * + * @see #prepareFlushedSegment(FlushedSegment) + */ + synchronized void publishFlushedSegment(SegmentInfo newSegment, + FrozenBufferedDeletes packet, FrozenBufferedDeletes globalPacket) throws IOException { + // Lock order IW -> BDS + synchronized (bufferedDeletesStream) { + if (globalPacket != null && globalPacket.any()) { + bufferedDeletesStream.push(globalPacket); + } + // Publishing the segment must be synched on IW -> BDS to make the sure + // that no merge prunes away the seg. private delete packet + final long nextGen; + if (packet != null && packet.any()) { + nextGen = bufferedDeletesStream.push(packet); + } else { + // Since we don't have a delete packet to apply we can get a new + // generation right away + nextGen = bufferedDeletesStream.getNextGen(); + } + newSegment.setBufferedDeletesGen(nextGen); + segmentInfos.add(newSegment); + checkpoint(); + } + } + + synchronized boolean useCompoundFile(SegmentInfo segmentInfo) throws IOException { + return mergePolicy.useCompoundFile(segmentInfos, segmentInfo); + } + private synchronized void resetMergeExceptions() { mergeExceptions = new ArrayList(); mergeGen++; @@ -2082,11 +2217,11 @@ public class IndexWriter implements Closeable { *

* NOTE: this method only copies the segments of the incoming indexes * and does not merge them. Therefore deleted documents are not removed and - * the new segments are not merged with the existing ones. Also, the segments - * are copied as-is, meaning they are not converted to CFS if they aren't, - * and vice-versa. If you wish to do that, you can call {@link #maybeMerge} + * the new segments are not merged with the existing ones. Also, the segments + * are copied as-is, meaning they are not converted to CFS if they aren't, + * and vice-versa. If you wish to do that, you can call {@link #maybeMerge} * or {@link #optimize} afterwards. - * + * *

This requires this index not be among those to be added. * *

@@ -2123,7 +2258,7 @@ public class IndexWriter implements Closeable { docCount += info.docCount; String newSegName = newSegmentName(); String dsName = info.getDocStoreSegment(); - + if (infoStream != null) { message("addIndexes: process segment origName=" + info.name + " newName=" + newSegName + " dsName=" + dsName + " info=" + info); } @@ -2170,7 +2305,7 @@ public class IndexWriter implements Closeable { infos.add(info); } - } + } synchronized (this) { ensureOpen(); @@ -2211,15 +2346,20 @@ public class IndexWriter implements Closeable { ensureOpen(); try { + if (infoStream != null) + message("flush at addIndexes(IndexReader...)"); + flush(false, true); + String mergedName = newSegmentName(); SegmentMerger merger = new SegmentMerger(directory, config.getTermIndexInterval(), mergedName, null, codecs, payloadProcessorProvider, globalFieldNumberMap.newFieldInfos(SegmentCodecsBuilder.create(codecs))); - + for (IndexReader reader : readers) // add new indexes merger.add(reader); - + int docCount = merger.merge(); // merge 'em + final FieldInfos fieldInfos = merger.fieldInfos(); SegmentInfo info = new SegmentInfo(mergedName, docCount, directory, false, fieldInfos.hasProx(), merger.getSegmentCodecs(), @@ -2231,11 +2371,11 @@ public class IndexWriter implements Closeable { synchronized(this) { // Guard segmentInfos useCompoundFile = mergePolicy.useCompoundFile(segmentInfos, info); } - + // Now create the compound file if needed if (useCompoundFile) { merger.createCompoundFile(mergedName + ".cfs", info); - + // delete new non cfs files directly: they were never // registered with IFD deleter.deleteNewFiles(info.files()); @@ -2287,7 +2427,7 @@ public class IndexWriter implements Closeable { * #commit()} to finish the commit, or {@link * #rollback()} to revert the commit and undo all changes * done since the writer was opened.

- * + * * You can also just call {@link #commit(Map)} directly * without prepareCommit first in which case that method * will internally call prepareCommit. @@ -2431,6 +2571,10 @@ public class IndexWriter implements Closeable { } } + // Ensures only one flush() is actually flushing segments + // at a time: + private final Object fullFlushLock = new Object(); + /** * Flush all in-memory buffered updates (adds and deletes) * to the Directory. @@ -2454,116 +2598,104 @@ public class IndexWriter implements Closeable { } } - // TODO: this method should not have to be entirely - // synchronized, ie, merges should be allowed to commit - // even while a flush is happening - private synchronized boolean doFlush(boolean applyAllDeletes) throws CorruptIndexException, IOException { - + private boolean doFlush(boolean applyAllDeletes) throws CorruptIndexException, IOException { if (hitOOM) { throw new IllegalStateException("this writer hit an OutOfMemoryError; cannot flush"); } doBeforeFlush(); - assert testPoint("startDoFlush"); - - // We may be flushing because it was triggered by doc - // count, del count, ram usage (in which case flush - // pending is already set), or we may be flushing - // due to external event eg getReader or commit is - // called (in which case we now set it, and this will - // pause all threads): - flushControl.setFlushPendingNoWait("explicit flush"); - boolean success = false; - try { if (infoStream != null) { message(" start flush: applyAllDeletes=" + applyAllDeletes); message(" index before flush " + segString()); } - - final SegmentInfo newSegment = docWriter.flush(this, deleter, mergePolicy, segmentInfos); - if (newSegment != null) { - setDiagnostics(newSegment, "flush"); - segmentInfos.add(newSegment); - checkpoint(); - } - - if (!applyAllDeletes) { - // If deletes alone are consuming > 1/2 our RAM - // buffer, force them all to apply now. This is to - // prevent too-frequent flushing of a long tail of - // tiny segments: - if (flushControl.getFlushDeletes() || - (config.getRAMBufferSizeMB() != IndexWriterConfig.DISABLE_AUTO_FLUSH && - bufferedDeletesStream.bytesUsed() > (1024*1024*config.getRAMBufferSizeMB()/2))) { - applyAllDeletes = true; - if (infoStream != null) { - message("force apply deletes bytesUsed=" + bufferedDeletesStream.bytesUsed() + " vs ramBuffer=" + (1024*1024*config.getRAMBufferSizeMB())); - } + final boolean anySegmentFlushed; + + synchronized (fullFlushLock) { + try { + anySegmentFlushed = docWriter.flushAllThreads(); + success = true; + } finally { + docWriter.finishFullFlush(success); } } - - if (applyAllDeletes) { - if (infoStream != null) { - message("apply all deletes during flush"); + success = false; + synchronized(this) { + maybeApplyDeletes(applyAllDeletes); + doAfterFlush(); + if (!anySegmentFlushed) { + // flushCount is incremented in flushAllThreads + flushCount.incrementAndGet(); } - flushDeletesCount.incrementAndGet(); - final BufferedDeletesStream.ApplyDeletesResult result = bufferedDeletesStream.applyDeletes(readerPool, segmentInfos); - if (result.anyDeletes) { - checkpoint(); - } - if (!keepFullyDeletedSegments && result.allDeleted != null) { - if (infoStream != null) { - message("drop 100% deleted segments: " + result.allDeleted); - } - for(SegmentInfo info : result.allDeleted) { - // If a merge has already registered for this - // segment, we leave it in the readerPool; the - // merge will skip merging it and will then drop - // it once it's done: - if (!mergingSegments.contains(info)) { - segmentInfos.remove(info); - if (readerPool != null) { - readerPool.drop(info); - } - } - } - checkpoint(); - } - bufferedDeletesStream.prune(segmentInfos); - assert !bufferedDeletesStream.any(); - flushControl.clearDeletes(); - } else if (infoStream != null) { - message("don't apply deletes now delTermCount=" + bufferedDeletesStream.numTerms() + " bytesUsed=" + bufferedDeletesStream.bytesUsed()); + success = true; + return anySegmentFlushed; } - - doAfterFlush(); - flushCount.incrementAndGet(); - - success = true; - - return newSegment != null; - } catch (OutOfMemoryError oom) { handleOOM(oom, "doFlush"); // never hit return false; } finally { - flushControl.clearFlushPending(); if (!success && infoStream != null) message("hit exception during flush"); } } + + final synchronized void maybeApplyDeletes(boolean applyAllDeletes) throws IOException { + if (applyAllDeletes) { + if (infoStream != null) { + message("apply all deletes during flush"); + } + applyAllDeletes(); + } else if (infoStream != null) { + message("don't apply deletes now delTermCount=" + bufferedDeletesStream.numTerms() + " bytesUsed=" + bufferedDeletesStream.bytesUsed()); + } + + } + + final synchronized void applyAllDeletes() throws IOException { + flushDeletesCount.incrementAndGet(); + final BufferedDeletesStream.ApplyDeletesResult result = bufferedDeletesStream + .applyDeletes(readerPool, segmentInfos); + if (result.anyDeletes) { + checkpoint(); + } + if (!keepFullyDeletedSegments && result.allDeleted != null) { + if (infoStream != null) { + message("drop 100% deleted segments: " + result.allDeleted); + } + for (SegmentInfo info : result.allDeleted) { + // If a merge has already registered for this + // segment, we leave it in the readerPool; the + // merge will skip merging it and will then drop + // it once it's done: + if (!mergingSegments.contains(info)) { + segmentInfos.remove(info); + if (readerPool != null) { + readerPool.drop(info); + } + } + } + checkpoint(); + } + bufferedDeletesStream.prune(segmentInfos); + } /** Expert: Return the total size of all index files currently cached in memory. * Useful for size management with flushRamDocs() */ public final long ramSizeInBytes() { ensureOpen(); - return docWriter.bytesUsed() + bufferedDeletesStream.bytesUsed(); + return docWriter.flushControl.netBytes() + bufferedDeletesStream.bytesUsed(); + } + + // for testing only + DocumentsWriter getDocsWriter() { + boolean test = false; + assert test = true; + return test?docWriter: null; } /** Expert: Return the number of documents currently @@ -2573,7 +2705,7 @@ public class IndexWriter implements Closeable { return docWriter.getNumDocs(); } - private void ensureValidMerge(MergePolicy.OneMerge merge) { + private void ensureValidMerge(MergePolicy.OneMerge merge) throws IOException { for(SegmentInfo info : merge.segments) { if (segmentInfos.indexOf(info) == -1) { throw new MergePolicy.MergeException("MergePolicy selected a segment (" + info.name + ") that is not in the current index " + segString(), directory); @@ -2699,7 +2831,7 @@ public class IndexWriter implements Closeable { } commitMergedDeletes(merge, mergedReader); - + // If the doc store we are using has been closed and // is in now compound format (but wasn't when we // started), then we will switch to the compound @@ -2713,7 +2845,7 @@ public class IndexWriter implements Closeable { message("merged segment " + merge.info + " is 100% deleted" + (keepFullyDeletedSegments ? "" : "; skipping insert")); } - final Set mergedAway = new HashSet(merge.segments); + final Set mergedAway = new HashSet(merge.segments); int segIdx = 0; int newSegIdx = 0; boolean inserted = false; @@ -2760,15 +2892,15 @@ public class IndexWriter implements Closeable { // them so that they don't bother writing them to // disk, updating SegmentInfo, etc.: readerPool.clear(merge.segments); - + if (merge.optimize) { // cascade the optimize: segmentsToOptimize.add(merge.info); } - + return true; } - + final private void handleMergeException(Throwable t, MergePolicy.OneMerge merge) throws IOException { if (infoStream != null) { @@ -2857,14 +2989,14 @@ public class IndexWriter implements Closeable { /** Hook that's called when the specified merge is complete. */ void mergeSuccess(MergePolicy.OneMerge merge) { } - + /** Checks whether this merge involves any segments * already participating in a merge. If not, this merge * is "registered", meaning we record that its segments * are now participating in a merge, and true is * returned. Else (the merge conflicts) false is * returned. */ - final synchronized boolean registerMerge(MergePolicy.OneMerge merge) throws MergePolicy.MergeAbortedException { + final synchronized boolean registerMerge(MergePolicy.OneMerge merge) throws MergePolicy.MergeAbortedException, IOException { if (merge.registerDone) return true; @@ -2874,10 +3006,8 @@ public class IndexWriter implements Closeable { throw new MergePolicy.MergeAbortedException("merge is aborted: " + merge.segString(directory)); } - final int count = merge.segments.size(); boolean isExternal = false; - for(int i=0;i(); merge.readerClones = new ArrayList(); + merge.estimatedMergeBytes = 0; + // This is try/finally to make sure merger's readers are // closed: boolean success = false; @@ -3134,6 +3273,13 @@ public class IndexWriter implements Closeable { -config.getReaderTermsIndexDivisor()); merge.readers.add(reader); + final int readerMaxDoc = reader.maxDoc(); + if (readerMaxDoc > 0) { + final int delCount = reader.numDeletedDocs(); + final double delRatio = ((double) delCount)/readerMaxDoc; + merge.estimatedMergeBytes += info.sizeInBytes(true) * (1.0 - delRatio); + } + // We clone the segment readers because other // deletes may come in while we're merging so we // need readers that will not change @@ -3166,7 +3312,7 @@ public class IndexWriter implements Closeable { message("merge store matchedCount=" + merger.getMatchedSubReaderCount() + " vs " + merge.readers.size()); } anyNonBulkMerges |= merger.getAnyNonBulkMerges(); - + assert mergedDocCount == totDocCount: "mergedDocCount=" + mergedDocCount + " vs " + totDocCount; // Very important to do this before opening the reader @@ -3235,8 +3381,11 @@ public class IndexWriter implements Closeable { merge.info.setUseCompoundFile(true); } - final IndexReaderWarmer mergedSegmentWarmer = config.getMergedSegmentWarmer(); + if (infoStream != null) { + message(String.format("merged segment size=%.3f MB vs estimate=%.3f MB", merge.info.sizeInBytes(true)/1024./1024., merge.estimatedMergeBytes/1024/1024.)); + } + final IndexReaderWarmer mergedSegmentWarmer = config.getMergedSegmentWarmer(); final int termsIndexDivisor; final boolean loadDocStores; @@ -3297,12 +3446,12 @@ public class IndexWriter implements Closeable { // For test purposes. final int getBufferedDeleteTermsSize() { - return docWriter.getPendingDeletes().terms.size(); + return docWriter.getBufferedDeleteTermsSize(); } // For test purposes. final int getNumBufferedDeleteTerms() { - return docWriter.getPendingDeletes().numTermDeletes.get(); + return docWriter.getNumBufferedDeleteTerms(); } // utility routines for tests @@ -3310,21 +3459,41 @@ public class IndexWriter implements Closeable { return segmentInfos.size() > 0 ? segmentInfos.info(segmentInfos.size()-1) : null; } - public synchronized String segString() { + /** @lucene.internal */ + public synchronized String segString() throws IOException { return segString(segmentInfos); } - private synchronized String segString(SegmentInfos infos) { + /** @lucene.internal */ + public synchronized String segString(SegmentInfos infos) throws IOException { StringBuilder buffer = new StringBuilder(); final int count = infos.size(); for(int i = 0; i < count; i++) { if (i > 0) { buffer.append(' '); } - final SegmentInfo info = infos.info(i); - buffer.append(info.toString(directory, 0)); - if (info.dir != directory) - buffer.append("**"); + buffer.append(segString(infos.info(i))); + } + + return buffer.toString(); + } + + public synchronized String segString(SegmentInfo info) throws IOException { + StringBuilder buffer = new StringBuilder(); + SegmentReader reader = readerPool.getIfExists(info); + try { + if (reader != null) { + buffer.append(reader.toString()); + } else { + buffer.append(info.toString(directory, 0)); + if (info.dir != directory) { + buffer.append("**"); + } + } + } finally { + if (reader != null) { + readerPool.release(reader); + } } return buffer.toString(); } @@ -3397,17 +3566,17 @@ public class IndexWriter implements Closeable { assert lastCommitChangeCount <= changeCount; myChangeCount = changeCount; - + if (changeCount == lastCommitChangeCount) { if (infoStream != null) message(" skip startCommit(): no changes pending"); return; } - + // First, we clone & incref the segmentInfos we intend // to sync, then, without locking, we sync() all files // referenced by toSync, in the background. - + if (infoStream != null) message("startCommit index=" + segString(segmentInfos) + " changeCount=" + changeCount); @@ -3415,10 +3584,10 @@ public class IndexWriter implements Closeable { toSync = (SegmentInfos) segmentInfos.clone(); assert filesExist(toSync); - + if (commitUserData != null) toSync.setUserData(commitUserData); - + // This protects the segmentInfos we are now going // to commit. This is important in case, eg, while // we are trying to sync all referenced files, a @@ -3550,7 +3719,7 @@ public class IndexWriter implements Closeable { /** Expert: remove any index files that are no longer * used. - * + * *

IndexWriter normally deletes unused files itself, * during indexing. However, on Windows, which disallows * deletion of open files, if there is a reader open on @@ -3599,7 +3768,7 @@ public class IndexWriter implements Closeable { public void setPayloadProcessorProvider(PayloadProcessorProvider pcp) { payloadProcessorProvider = pcp; } - + /** * Returns the {@link PayloadProcessorProvider} that is used during segment * merges to process payloads. @@ -3607,124 +3776,4 @@ public class IndexWriter implements Closeable { public PayloadProcessorProvider getPayloadProcessorProvider() { return payloadProcessorProvider; } - - // decides when flushes happen - final class FlushControl { - - private boolean flushPending; - private boolean flushDeletes; - private int delCount; - private int docCount; - private boolean flushing; - - private synchronized boolean setFlushPending(String reason, boolean doWait) { - if (flushPending || flushing) { - if (doWait) { - while(flushPending || flushing) { - try { - wait(); - } catch (InterruptedException ie) { - throw new ThreadInterruptedException(ie); - } - } - } - return false; - } else { - if (infoStream != null) { - message("now trigger flush reason=" + reason); - } - flushPending = true; - return flushPending; - } - } - - public synchronized void setFlushPendingNoWait(String reason) { - setFlushPending(reason, false); - } - - public synchronized boolean getFlushPending() { - return flushPending; - } - - public synchronized boolean getFlushDeletes() { - return flushDeletes; - } - - public synchronized void clearFlushPending() { - if (infoStream != null) { - message("clearFlushPending"); - } - flushPending = false; - flushDeletes = false; - docCount = 0; - notifyAll(); - } - - public synchronized void clearDeletes() { - delCount = 0; - } - - public synchronized boolean waitUpdate(int docInc, int delInc) { - return waitUpdate(docInc, delInc, false); - } - - public synchronized boolean waitUpdate(int docInc, int delInc, boolean skipWait) { - while(flushPending) { - try { - wait(); - } catch (InterruptedException ie) { - throw new ThreadInterruptedException(ie); - } - } - - // skipWait is only used when a thread is BOTH adding - // a doc and buffering a del term, and, the adding of - // the doc already triggered a flush - if (skipWait) { - docCount += docInc; - delCount += delInc; - return false; - } - - final int maxBufferedDocs = config.getMaxBufferedDocs(); - if (maxBufferedDocs != IndexWriterConfig.DISABLE_AUTO_FLUSH && - (docCount+docInc) >= maxBufferedDocs) { - return setFlushPending("maxBufferedDocs", true); - } - docCount += docInc; - - final int maxBufferedDeleteTerms = config.getMaxBufferedDeleteTerms(); - if (maxBufferedDeleteTerms != IndexWriterConfig.DISABLE_AUTO_FLUSH && - (delCount+delInc) >= maxBufferedDeleteTerms) { - flushDeletes = true; - return setFlushPending("maxBufferedDeleteTerms", true); - } - delCount += delInc; - - return flushByRAMUsage("add delete/doc"); - } - - public synchronized boolean flushByRAMUsage(String reason) { - final double ramBufferSizeMB = config.getRAMBufferSizeMB(); - if (ramBufferSizeMB != IndexWriterConfig.DISABLE_AUTO_FLUSH) { - final long limit = (long) (ramBufferSizeMB*1024*1024); - long used = bufferedDeletesStream.bytesUsed() + docWriter.bytesUsed(); - if (used >= limit) { - - // DocumentsWriter may be able to free up some - // RAM: - // Lock order: FC -> DW - docWriter.balanceRAM(); - - used = bufferedDeletesStream.bytesUsed() + docWriter.bytesUsed(); - if (used >= limit) { - return setFlushPending("ram full: " + reason, false); - } - } - } - return false; - } - } - - final FlushControl flushControl = new FlushControl(); } diff --git a/lucene/src/java/org/apache/lucene/index/IndexWriterConfig.java b/lucene/src/java/org/apache/lucene/index/IndexWriterConfig.java index 1674068491d..742043dd5cb 100644 --- a/lucene/src/java/org/apache/lucene/index/IndexWriterConfig.java +++ b/lucene/src/java/org/apache/lucene/index/IndexWriterConfig.java @@ -18,7 +18,7 @@ package org.apache.lucene.index; */ import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.index.DocumentsWriter.IndexingChain; +import org.apache.lucene.index.DocumentsWriterPerThread.IndexingChain; import org.apache.lucene.index.IndexWriter.IndexReaderWarmer; import org.apache.lucene.index.codecs.CodecProvider; import org.apache.lucene.search.IndexSearcher; @@ -41,7 +41,7 @@ import org.apache.lucene.util.Version; * IndexWriterConfig conf = new IndexWriterConfig(analyzer); * conf.setter1().setter2(); * - * + * * @since 3.1 */ public final class IndexWriterConfig implements Cloneable { @@ -56,7 +56,7 @@ public final class IndexWriterConfig implements Cloneable { * */ public static enum OpenMode { CREATE, APPEND, CREATE_OR_APPEND } - + /** Default value is 32. Change using {@link #setTermIndexInterval(int)}. */ public static final int DEFAULT_TERM_INDEX_INTERVAL = 32; // TODO: this should be private to the codec, not settable here @@ -77,23 +77,19 @@ public final class IndexWriterConfig implements Cloneable { /** * Default value for the write lock timeout (1,000 ms). - * + * * @see #setDefaultWriteLockTimeout(long) */ public static long WRITE_LOCK_TIMEOUT = 1000; - /** The maximum number of simultaneous threads that may be - * indexing documents at once in IndexWriter; if more - * than this many threads arrive they will wait for - * others to finish. */ - public final static int DEFAULT_MAX_THREAD_STATES = 8; - /** Default setting for {@link #setReaderPooling}. */ public final static boolean DEFAULT_READER_POOLING = false; /** Default value is 1. Change using {@link #setReaderTermsIndexDivisor(int)}. */ public static final int DEFAULT_READER_TERMS_INDEX_DIVISOR = IndexReader.DEFAULT_TERMS_INDEX_DIVISOR; + /** Default value is 1945. Change using {@link #setRAMPerThreadHardLimitMB(int)} */ + public static final int DEFAULT_RAM_PER_THREAD_HARD_LIMIT_MB = 1945; /** * Sets the default (for any instance) maximum time to wait for a write lock * (in milliseconds). @@ -105,7 +101,7 @@ public final class IndexWriterConfig implements Cloneable { /** * Returns the default write lock timeout for newly instantiated * IndexWriterConfigs. - * + * * @see #setDefaultWriteLockTimeout(long) */ public static long getDefaultWriteLockTimeout() { @@ -127,10 +123,12 @@ public final class IndexWriterConfig implements Cloneable { private volatile IndexReaderWarmer mergedSegmentWarmer; private volatile CodecProvider codecProvider; private volatile MergePolicy mergePolicy; - private volatile int maxThreadStates; + private volatile DocumentsWriterPerThreadPool indexerThreadPool; private volatile boolean readerPooling; private volatile int readerTermsIndexDivisor; - + private volatile FlushPolicy flushPolicy; + private volatile int perThreadHardLimitMB; + private Version matchVersion; /** @@ -153,15 +151,16 @@ public final class IndexWriterConfig implements Cloneable { maxBufferedDeleteTerms = DEFAULT_MAX_BUFFERED_DELETE_TERMS; ramBufferSizeMB = DEFAULT_RAM_BUFFER_SIZE_MB; maxBufferedDocs = DEFAULT_MAX_BUFFERED_DOCS; - indexingChain = DocumentsWriter.defaultIndexingChain; + indexingChain = DocumentsWriterPerThread.defaultIndexingChain; mergedSegmentWarmer = null; codecProvider = CodecProvider.getDefault(); - mergePolicy = new LogByteSizeMergePolicy(); - maxThreadStates = DEFAULT_MAX_THREAD_STATES; + mergePolicy = new TieredMergePolicy(); readerPooling = DEFAULT_READER_POOLING; + indexerThreadPool = new ThreadAffinityDocumentsWriterThreadPool(); readerTermsIndexDivisor = DEFAULT_READER_TERMS_INDEX_DIVISOR; + perThreadHardLimitMB = DEFAULT_RAM_PER_THREAD_HARD_LIMIT_MB; } - + @Override public Object clone() { // Shallow clone is the only thing that's possible, since parameters like @@ -186,7 +185,7 @@ public final class IndexWriterConfig implements Cloneable { this.openMode = openMode; return this; } - + /** Returns the {@link OpenMode} set by {@link #setOpenMode(OpenMode)}. */ public OpenMode getOpenMode() { return openMode; @@ -261,7 +260,7 @@ public final class IndexWriterConfig implements Cloneable { public SimilarityProvider getSimilarityProvider() { return similarityProvider; } - + /** * Expert: set the interval between indexed terms. Large values cause less * memory to be used by IndexReader, but slow random-access to terms. Small @@ -281,7 +280,7 @@ public final class IndexWriterConfig implements Cloneable { * In particular, numUniqueTerms/interval terms are read into * memory by an IndexReader, and, on average, interval/2 terms * must be scanned for each random term access. - * + * * @see #DEFAULT_TERM_INDEX_INTERVAL * *

Takes effect immediately, but only applies to newly @@ -293,7 +292,7 @@ public final class IndexWriterConfig implements Cloneable { /** * Returns the interval between indexed terms. - * + * * @see #setTermIndexInterval(int) */ public int getTermIndexInterval() { // TODO: this should be private to the codec, not settable here @@ -331,10 +330,10 @@ public final class IndexWriterConfig implements Cloneable { this.writeLockTimeout = writeLockTimeout; return this; } - + /** * Returns allowed timeout when acquiring the write lock. - * + * * @see #setWriteLockTimeout(long) */ public long getWriteLockTimeout() { @@ -343,15 +342,16 @@ public final class IndexWriterConfig implements Cloneable { /** * Determines the minimal number of delete terms required before the buffered - * in-memory delete terms are applied and flushed. If there are documents - * buffered in memory at the time, they are merged and a new segment is - * created. - - *

Disabled by default (writer flushes by RAM usage). + * in-memory delete terms and queries are applied and flushed. + *

Disabled by default (writer flushes by RAM usage).

+ *

+ * NOTE: This setting won't trigger a segment flush. + *

* * @throws IllegalArgumentException if maxBufferedDeleteTerms * is enabled but smaller than 1 * @see #setRAMBufferSizeMB + * @see #setFlushPolicy(FlushPolicy) * *

Takes effect immediately, but only the next time a * document is added, updated or deleted. @@ -366,9 +366,9 @@ public final class IndexWriterConfig implements Cloneable { } /** - * Returns the number of buffered deleted terms that will trigger a flush if - * enabled. - * + * Returns the number of buffered deleted terms that will trigger a flush of all + * buffered deletes if enabled. + * * @see #setMaxBufferedDeleteTerms(int) */ public int getMaxBufferedDeleteTerms() { @@ -380,45 +380,50 @@ public final class IndexWriterConfig implements Cloneable { * and deletions before they are flushed to the Directory. Generally for * faster indexing performance it's best to flush by RAM usage instead of * document count and use as large a RAM buffer as you can. - * *

* When this is set, the writer will flush whenever buffered documents and * deletions use this much RAM. Pass in {@link #DISABLE_AUTO_FLUSH} to prevent * triggering a flush due to RAM usage. Note that if flushing by document * count is also enabled, then the flush will be triggered by whichever comes * first. - * + *

+ * The maximum RAM limit is inherently determined by the JVMs available memory. + * Yet, an {@link IndexWriter} session can consume a significantly larger amount + * of memory than the given RAM limit since this limit is just an indicator when + * to flush memory resident documents to the Directory. Flushes are likely happen + * concurrently while other threads adding documents to the writer. For application + * stability the available memory in the JVM should be significantly larger than + * the RAM buffer used for indexing. *

* NOTE: the account of RAM usage for pending deletions is only * approximate. Specifically, if you delete by Query, Lucene currently has no * way to measure the RAM usage of individual Queries so the accounting will * under-estimate and you should compensate by either calling commit() * periodically yourself, or by using {@link #setMaxBufferedDeleteTerms(int)} - * to flush by count instead of RAM usage (each buffered delete Query counts - * as one). - * + * to flush and apply buffered deletes by count instead of RAM usage + * (for each buffered delete Query a constant number of bytes is used to estimate + * RAM usage). Note that enabling {@link #setMaxBufferedDeleteTerms(int)} will + * not trigger any segment flushes. *

- * NOTE: because IndexWriter uses ints when managing its - * internal storage, the absolute maximum value for this setting is somewhat - * less than 2048 MB. The precise limit depends on various factors, such as - * how large your documents are, how many fields have norms, etc., so it's - * best to set this value comfortably under 2048. - * + * NOTE: It's not guaranteed that all memory resident documents are flushed + * once this limit is exceeded. Depending on the configured {@link FlushPolicy} only a + * subset of the buffered documents are flushed and therefore only parts of the RAM + * buffer is released. *

+ * * The default value is {@link #DEFAULT_RAM_BUFFER_SIZE_MB}. - * + * @see #setFlushPolicy(FlushPolicy) + * @see #setRAMPerThreadHardLimitMB(int) + * *

Takes effect immediately, but only the next time a * document is added, updated or deleted. * * @throws IllegalArgumentException * if ramBufferSize is enabled but non-positive, or it disables * ramBufferSize when maxBufferedDocs is already disabled + * */ public IndexWriterConfig setRAMBufferSizeMB(double ramBufferSizeMB) { - if (ramBufferSizeMB > 2048.0) { - throw new IllegalArgumentException("ramBufferSize " + ramBufferSizeMB - + " is too large; should be comfortably less than 2048"); - } if (ramBufferSizeMB != DISABLE_AUTO_FLUSH && ramBufferSizeMB <= 0.0) throw new IllegalArgumentException( "ramBufferSize should be > 0.0 MB when enabled"); @@ -438,22 +443,22 @@ public final class IndexWriterConfig implements Cloneable { * Determines the minimal number of documents required before the buffered * in-memory documents are flushed as a new Segment. Large values generally * give faster indexing. - * + * *

* When this is set, the writer will flush every maxBufferedDocs added * documents. Pass in {@link #DISABLE_AUTO_FLUSH} to prevent triggering a * flush due to number of buffered documents. Note that if flushing by RAM * usage is also enabled, then the flush will be triggered by whichever comes * first. - * + * *

* Disabled by default (writer flushes by RAM usage). - * + * *

Takes effect immediately, but only the next time a * document is added, updated or deleted. * * @see #setRAMBufferSizeMB(double) - * + * @see #setFlushPolicy(FlushPolicy) * @throws IllegalArgumentException * if maxBufferedDocs is enabled but smaller than 2, or it disables * maxBufferedDocs when ramBufferSize is already disabled @@ -473,7 +478,7 @@ public final class IndexWriterConfig implements Cloneable { /** * Returns the number of buffered added documents that will trigger a flush if * enabled. - * + * * @see #setMaxBufferedDocs(int) */ public int getMaxBufferedDocs() { @@ -519,32 +524,43 @@ public final class IndexWriterConfig implements Cloneable { return codecProvider; } - + /** * Returns the current MergePolicy in use by this writer. - * + * * @see #setMergePolicy(MergePolicy) */ public MergePolicy getMergePolicy() { return mergePolicy; } - /** - * Sets the max number of simultaneous threads that may be indexing documents - * at once in IndexWriter. Values < 1 are invalid and if passed - * maxThreadStates will be set to - * {@link #DEFAULT_MAX_THREAD_STATES}. - * - *

Only takes effect when IndexWriter is first created. */ - public IndexWriterConfig setMaxThreadStates(int maxThreadStates) { - this.maxThreadStates = maxThreadStates < 1 ? DEFAULT_MAX_THREAD_STATES : maxThreadStates; + /** Expert: Sets the {@link DocumentsWriterPerThreadPool} instance used by the + * IndexWriter to assign thread-states to incoming indexing threads. If no + * {@link DocumentsWriterPerThreadPool} is set {@link IndexWriter} will use + * {@link ThreadAffinityDocumentsWriterThreadPool} with max number of + * thread-states set to {@link DocumentsWriterPerThreadPool#DEFAULT_MAX_THREAD_STATES} (see + * {@link DocumentsWriterPerThreadPool#DEFAULT_MAX_THREAD_STATES}). + *

+ *

+ * NOTE: The given {@link DocumentsWriterPerThreadPool} instance must not be used with + * other {@link IndexWriter} instances once it has been initialized / associated with an + * {@link IndexWriter}. + *

+ *

+ * NOTE: This only takes effect when IndexWriter is first created.

*/ + public IndexWriterConfig setIndexerThreadPool(DocumentsWriterPerThreadPool threadPool) { + if(threadPool == null) { + throw new IllegalArgumentException("DocumentsWriterPerThreadPool must not be nul"); + } + this.indexerThreadPool = threadPool; return this; } - /** Returns the max number of simultaneous threads that - * may be indexing documents at once in IndexWriter. */ - public int getMaxThreadStates() { - return maxThreadStates; + /** Returns the configured {@link DocumentsWriterPerThreadPool} instance. + * @see #setIndexerThreadPool(DocumentsWriterPerThreadPool) + * @return the configured {@link DocumentsWriterPerThreadPool} instance.*/ + public DocumentsWriterPerThreadPool getIndexerThreadPool() { + return this.indexerThreadPool; } /** By default, IndexWriter does not pool the @@ -572,10 +588,10 @@ public final class IndexWriterConfig implements Cloneable { * *

Only takes effect when IndexWriter is first created. */ IndexWriterConfig setIndexingChain(IndexingChain indexingChain) { - this.indexingChain = indexingChain == null ? DocumentsWriter.defaultIndexingChain : indexingChain; + this.indexingChain = indexingChain == null ? DocumentsWriterPerThread.defaultIndexingChain : indexingChain; return this; } - + /** Returns the indexing chain set on {@link #setIndexingChain(IndexingChain)}. */ IndexingChain getIndexingChain() { return indexingChain; @@ -604,6 +620,53 @@ public final class IndexWriterConfig implements Cloneable { return readerTermsIndexDivisor; } + /** + * Expert: Controls when segments are flushed to disk during indexing. + * The {@link FlushPolicy} initialized during {@link IndexWriter} instantiation and once initialized + * the given instance is bound to this {@link IndexWriter} and should not be used with another writer. + * @see #setMaxBufferedDeleteTerms(int) + * @see #setMaxBufferedDocs(int) + * @see #setRAMBufferSizeMB(double) + */ + public IndexWriterConfig setFlushPolicy(FlushPolicy flushPolicy) { + this.flushPolicy = flushPolicy; + return this; + } + + /** + * Expert: Sets the maximum memory consumption per thread triggering a forced + * flush if exceeded. A {@link DocumentsWriterPerThread} is forcefully flushed + * once it exceeds this limit even if the {@link #getRAMBufferSizeMB()} has + * not been exceeded. This is a safety limit to prevent a + * {@link DocumentsWriterPerThread} from address space exhaustion due to its + * internal 32 bit signed integer based memory addressing. + * The given value must be less that 2GB (2048MB) + * + * @see #DEFAULT_RAM_PER_THREAD_HARD_LIMIT_MB + */ + public IndexWriterConfig setRAMPerThreadHardLimitMB(int perThreadHardLimitMB) { + if (perThreadHardLimitMB <= 0 || perThreadHardLimitMB >= 2048) { + throw new IllegalArgumentException("PerThreadHardLimit must be greater than 0 and less than 2048MB"); + } + this.perThreadHardLimitMB = perThreadHardLimitMB; + return this; + } + + /** + * Returns the max amount of memory each {@link DocumentsWriterPerThread} can + * consume until forcefully flushed. + * @see #setRAMPerThreadHardLimitMB(int) + */ + public int getRAMPerThreadHardLimitMB() { + return perThreadHardLimitMB; + } + /** + * @see #setFlushPolicy(FlushPolicy) + */ + public FlushPolicy getFlushPolicy() { + return flushPolicy; + } + @Override public String toString() { StringBuilder sb = new StringBuilder(); @@ -623,9 +686,13 @@ public final class IndexWriterConfig implements Cloneable { sb.append("mergedSegmentWarmer=").append(mergedSegmentWarmer).append("\n"); sb.append("codecProvider=").append(codecProvider).append("\n"); sb.append("mergePolicy=").append(mergePolicy).append("\n"); - sb.append("maxThreadStates=").append(maxThreadStates).append("\n"); + sb.append("indexerThreadPool=").append(indexerThreadPool).append("\n"); sb.append("readerPooling=").append(readerPooling).append("\n"); sb.append("readerTermsIndexDivisor=").append(readerTermsIndexDivisor).append("\n"); + sb.append("flushPolicy=").append(flushPolicy).append("\n"); + sb.append("perThreadHardLimitMB=").append(perThreadHardLimitMB).append("\n"); + return sb.toString(); } + } diff --git a/lucene/src/java/org/apache/lucene/index/IntBlockPool.java b/lucene/src/java/org/apache/lucene/index/IntBlockPool.java index 013c7b3248f..16093a5c34e 100644 --- a/lucene/src/java/org/apache/lucene/index/IntBlockPool.java +++ b/lucene/src/java/org/apache/lucene/index/IntBlockPool.java @@ -1,5 +1,7 @@ package org.apache.lucene.index; +import java.util.Arrays; + /** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with @@ -22,24 +24,24 @@ final class IntBlockPool { public int[][] buffers = new int[10][]; int bufferUpto = -1; // Which buffer we are upto - public int intUpto = DocumentsWriter.INT_BLOCK_SIZE; // Where we are in head buffer + public int intUpto = DocumentsWriterPerThread.INT_BLOCK_SIZE; // Where we are in head buffer public int[] buffer; // Current head buffer - public int intOffset = -DocumentsWriter.INT_BLOCK_SIZE; // Current head offset + public int intOffset = -DocumentsWriterPerThread.INT_BLOCK_SIZE; // Current head offset - final private DocumentsWriter docWriter; + final private DocumentsWriterPerThread docWriter; - public IntBlockPool(DocumentsWriter docWriter) { + public IntBlockPool(DocumentsWriterPerThread docWriter) { this.docWriter = docWriter; } public void reset() { if (bufferUpto != -1) { - if (bufferUpto > 0) - // Recycle all but the first buffer - docWriter.recycleIntBlocks(buffers, 1, 1+bufferUpto); - // Reuse first buffer + if (bufferUpto > 0) { + docWriter.recycleIntBlocks(buffers, 1, bufferUpto-1); + Arrays.fill(buffers, 1, bufferUpto, null); + } bufferUpto = 0; intUpto = 0; intOffset = 0; @@ -57,7 +59,7 @@ final class IntBlockPool { bufferUpto++; intUpto = 0; - intOffset += DocumentsWriter.INT_BLOCK_SIZE; + intOffset += DocumentsWriterPerThread.INT_BLOCK_SIZE; } } diff --git a/lucene/src/java/org/apache/lucene/index/InvertedDocConsumer.java b/lucene/src/java/org/apache/lucene/index/InvertedDocConsumer.java index 76ca1d7fddf..5f4a84072a8 100644 --- a/lucene/src/java/org/apache/lucene/index/InvertedDocConsumer.java +++ b/lucene/src/java/org/apache/lucene/index/InvertedDocConsumer.java @@ -17,20 +17,22 @@ package org.apache.lucene.index; * limitations under the License. */ -import java.util.Collection; -import java.util.Map; import java.io.IOException; +import java.util.Map; abstract class InvertedDocConsumer { - /** Add a new thread */ - abstract InvertedDocConsumerPerThread addThread(DocInverterPerThread docInverterPerThread); - /** Abort (called after hitting AbortException) */ abstract void abort(); /** Flush a new segment */ - abstract void flush(Map> threadsAndFields, SegmentWriteState state) throws IOException; + abstract void flush(Map fieldsToFlush, SegmentWriteState state) throws IOException; + + abstract InvertedDocConsumerPerField addField(DocInverterPerField docInverterPerField, FieldInfo fieldInfo); + + abstract void startDocument() throws IOException; + + abstract void finishDocument() throws IOException; /** Attempt to free RAM, returning true if any RAM was * freed */ diff --git a/lucene/src/java/org/apache/lucene/index/InvertedDocEndConsumer.java b/lucene/src/java/org/apache/lucene/index/InvertedDocEndConsumer.java index 351529f381b..2477cef5f6f 100644 --- a/lucene/src/java/org/apache/lucene/index/InvertedDocEndConsumer.java +++ b/lucene/src/java/org/apache/lucene/index/InvertedDocEndConsumer.java @@ -17,12 +17,13 @@ package org.apache.lucene.index; * limitations under the License. */ -import java.util.Collection; -import java.util.Map; import java.io.IOException; +import java.util.Map; abstract class InvertedDocEndConsumer { - abstract InvertedDocEndConsumerPerThread addThread(DocInverterPerThread docInverterPerThread); - abstract void flush(Map> threadsAndFields, SegmentWriteState state) throws IOException; + abstract void flush(Map fieldsToFlush, SegmentWriteState state) throws IOException; abstract void abort(); + abstract InvertedDocEndConsumerPerField addField(DocInverterPerField docInverterPerField, FieldInfo fieldInfo); + abstract void startDocument() throws IOException; + abstract void finishDocument() throws IOException; } diff --git a/lucene/src/java/org/apache/lucene/index/InvertedDocEndConsumerPerThread.java b/lucene/src/java/org/apache/lucene/index/InvertedDocEndConsumerPerThread.java deleted file mode 100644 index 4b3119f30e1..00000000000 --- a/lucene/src/java/org/apache/lucene/index/InvertedDocEndConsumerPerThread.java +++ /dev/null @@ -1,25 +0,0 @@ -package org.apache.lucene.index; - -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -abstract class InvertedDocEndConsumerPerThread { - abstract void startDocument(); - abstract InvertedDocEndConsumerPerField addField(DocInverterPerField docInverterPerField, FieldInfo fieldInfo); - abstract void finishDocument(); - abstract void abort(); -} diff --git a/lucene/src/java/org/apache/lucene/index/LogMergePolicy.java b/lucene/src/java/org/apache/lucene/index/LogMergePolicy.java index 669d3b0d901..1be4f26b77f 100644 --- a/lucene/src/java/org/apache/lucene/index/LogMergePolicy.java +++ b/lucene/src/java/org/apache/lucene/index/LogMergePolicy.java @@ -20,7 +20,6 @@ package org.apache.lucene.index; import java.io.IOException; import java.util.ArrayList; import java.util.Collection; -import java.util.Collections; import java.util.Comparator; import java.util.List; import java.util.Set; @@ -72,7 +71,6 @@ public abstract class LogMergePolicy extends MergePolicy { // out there wrote his own LMP ... protected long maxMergeSizeForOptimize = Long.MAX_VALUE; protected int maxMergeDocs = DEFAULT_MAX_MERGE_DOCS; - protected boolean requireContiguousMerge = false; protected double noCFSRatio = DEFAULT_NO_CFS_RATIO; @@ -111,21 +109,6 @@ public abstract class LogMergePolicy extends MergePolicy { writer.get().message("LMP: " + message); } - /** If true, merges must be in-order slice of the - * segments. If false, then the merge policy is free to - * pick any segments. The default is false, which is - * in general more efficient than true since it gives the - * merge policy more freedom to pick closely sized - * segments. */ - public void setRequireContiguousMerge(boolean v) { - requireContiguousMerge = v; - } - - /** See {@link #setRequireContiguousMerge}. */ - public boolean getRequireContiguousMerge() { - return requireContiguousMerge; - } - /**

Returns the number of segments that are merged at * once and also controls the total number of segments * allowed to accumulate in the index.

*/ @@ -378,8 +361,6 @@ public abstract class LogMergePolicy extends MergePolicy { return null; } - // TODO: handle non-contiguous merge case differently? - // Find the newest (rightmost) segment that needs to // be optimized (other segments may have been flushed // since optimize started): @@ -499,14 +480,6 @@ public abstract class LogMergePolicy extends MergePolicy { } } - private static class SortByIndex implements Comparator { - public int compare(SegmentInfoAndLevel o1, SegmentInfoAndLevel o2) { - return o1.index - o2.index; - } - } - - private static final SortByIndex sortByIndex = new SortByIndex(); - /** Checks if any merges are now necessary and returns a * {@link MergePolicy.MergeSpecification} if so. A merge * is necessary when there are more than {@link @@ -532,29 +505,22 @@ public abstract class LogMergePolicy extends MergePolicy { final SegmentInfo info = infos.info(i); long size = size(info); - // When we require contiguous merge, we still add the - // segment to levels to avoid merging "across" a set - // of segment being merged: - if (!requireContiguousMerge && mergingSegments.contains(info)) { - if (verbose()) { - message("seg " + info.name + " already being merged; skip"); - } - continue; - } - // Floor tiny segments if (size < 1) { size = 1; } + final SegmentInfoAndLevel infoLevel = new SegmentInfoAndLevel(info, (float) Math.log(size)/norm, i); levels.add(infoLevel); - if (verbose()) { - message("seg " + info.name + " level=" + infoLevel.level + " size=" + size); - } - } - if (!requireContiguousMerge) { - Collections.sort(levels); + if (verbose()) { + final long segBytes = sizeBytes(info); + String extra = mergingSegments.contains(info) ? " [merging]" : ""; + if (size >= maxMergeSize) { + extra += " [skip: too large]"; + } + message("seg=" + writer.get().segString(info) + " level=" + infoLevel.level + " size=" + String.format("%.3f MB", segBytes/1024/1024.) + extra); + } } final float levelFloor; @@ -614,23 +580,29 @@ public abstract class LogMergePolicy extends MergePolicy { int end = start + mergeFactor; while(end <= 1+upto) { boolean anyTooLarge = false; + boolean anyMerging = false; for(int i=start;i= maxMergeSize || sizeDocs(info) >= maxMergeDocs); + if (mergingSegments.contains(info)) { + anyMerging = true; + break; + } } - if (!anyTooLarge) { + if (anyMerging) { + // skip + } else if (!anyTooLarge) { if (spec == null) spec = new MergeSpecification(); - if (verbose()) { - message(" " + start + " to " + end + ": add this merge"); - } - Collections.sort(levels.subList(start, end), sortByIndex); final SegmentInfos mergeInfos = new SegmentInfos(); for(int i=start;i readers; // used by IndexWriter List readerClones; // used by IndexWriter public final SegmentInfos segments; diff --git a/lucene/src/java/org/apache/lucene/index/MultiFields.java b/lucene/src/java/org/apache/lucene/index/MultiFields.java index 0943aacbdaa..841349a4a33 100644 --- a/lucene/src/java/org/apache/lucene/index/MultiFields.java +++ b/lucene/src/java/org/apache/lucene/index/MultiFields.java @@ -51,7 +51,6 @@ public final class MultiFields extends Fields { private final Fields[] subs; private final ReaderUtil.Slice[] subSlices; private final Map terms = new ConcurrentHashMap(); - private final Map docValues = new ConcurrentHashMap(); /** Returns a single {@link Fields} instance for this * reader, merging fields/terms/docs/positions on the @@ -193,12 +192,6 @@ public final class MultiFields extends Fields { } } - /** This method may return null if the field does not exist.*/ - public static DocValues getDocValues(IndexReader r, String field) throws IOException { - final Fields fields = getFields(r); - return fields == null? null: fields.docValues(field); - } - /** Returns {@link DocsEnum} for the specified field & * term. This may return null if the term does not * exist. */ @@ -283,41 +276,5 @@ public final class MultiFields extends Fields { return result; } - @Override - public DocValues docValues(String field) throws IOException { - DocValues result = docValues.get(field); - if (result == null) { - // Lazy init: first time this field is requested, we - // create & add to docValues: - final List docValuesIndex = new ArrayList(); - int docsUpto = 0; - Type type = null; - // Gather all sub-readers that share this field - for(int i=0;i docValuesIndex = new ArrayList(); - int docsUpto = 0; - Type type = null; - final int numEnums = enumWithSlices.length; - for (int i = 0; i < numEnums; i++) { - FieldsEnumWithSlice withSlice = enumWithSlices[i]; - Slice slice = withSlice.slice; - final DocValues values = withSlice.fields.docValues(); - final int start = slice.start; - final int length = slice.length; - if (values != null && currentField.equals(withSlice.current)) { - if (docsUpto != start) { - type = values.type(); - docValuesIndex.add(new MultiDocValues.DocValuesIndex( - new MultiDocValues.DummyDocValues(start, type), docsUpto, start - - docsUpto)); - } - docValuesIndex.add(new MultiDocValues.DocValuesIndex(values, start, - length)); - docsUpto = start + length; - - } else if (i + 1 == numEnums && !docValuesIndex.isEmpty()) { - docValuesIndex.add(new MultiDocValues.DocValuesIndex( - new MultiDocValues.DummyDocValues(start, type), docsUpto, start - - docsUpto)); - } - } - return docValuesIndex.isEmpty() ? null : docValues.reset(docValuesIndex - .toArray(MultiDocValues.DocValuesIndex.EMPTY_ARRAY)); - } +// @Override +// public DocValues docValues() throws IOException { +// final List docValuesIndex = new ArrayList(); +// int docsUpto = 0; +// Type type = null; +// final int numEnums = enumWithSlices.length; +// for (int i = 0; i < numEnums; i++) { +// FieldsEnumWithSlice withSlice = enumWithSlices[i]; +// Slice slice = withSlice.slice; +// final DocValues values = withSlice.fields.docValues(); +// final int start = slice.start; +// final int length = slice.length; +// if (values != null && currentField.equals(withSlice.current)) { +// if (docsUpto != start) { +// type = values.type(); +// docValuesIndex.add(new MultiDocValues.DocValuesIndex( +// new MultiDocValues.DummyDocValues(start, type), docsUpto, start +// - docsUpto)); +// } +// docValuesIndex.add(new MultiDocValues.DocValuesIndex(values, start, +// length)); +// docsUpto = start + length; +// +// } else if (i + 1 == numEnums && !docValuesIndex.isEmpty()) { +// docValuesIndex.add(new MultiDocValues.DocValuesIndex( +// new MultiDocValues.DummyDocValues(start, type), docsUpto, start +// - docsUpto)); +// } +// } +// return docValuesIndex.isEmpty() ? null : docValues.reset(docValuesIndex +// .toArray(MultiDocValues.DocValuesIndex.EMPTY_ARRAY)); +// } } diff --git a/lucene/src/java/org/apache/lucene/index/MultiPerDocValues.java b/lucene/src/java/org/apache/lucene/index/MultiPerDocValues.java new file mode 100644 index 00000000000..bf10a43f6a0 --- /dev/null +++ b/lucene/src/java/org/apache/lucene/index/MultiPerDocValues.java @@ -0,0 +1,148 @@ +package org.apache.lucene.index; +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; +import java.util.Map; +import java.util.TreeSet; +import java.util.concurrent.ConcurrentHashMap; + +import org.apache.lucene.index.codecs.PerDocValues; +import org.apache.lucene.index.values.DocValues; +import org.apache.lucene.index.values.MultiDocValues; +import org.apache.lucene.index.values.Type; +import org.apache.lucene.index.values.MultiDocValues.DocValuesIndex; +import org.apache.lucene.util.ReaderUtil; + +/** + * + * nocommit - javadoc + * @experimental + */ +public class MultiPerDocValues extends PerDocValues { + private final PerDocValues[] subs; + private final ReaderUtil.Slice[] subSlices; + private final Map docValues = new ConcurrentHashMap(); + private final TreeSet fields; + + public MultiPerDocValues(PerDocValues[] subs, ReaderUtil.Slice[] subSlices) { + this.subs = subs; + this.subSlices = subSlices; + fields = new TreeSet(); + for (PerDocValues sub : subs) { + fields.addAll(sub.fields()); + } + } + + public static PerDocValues getPerDocs(IndexReader r) throws IOException { + final IndexReader[] subs = r.getSequentialSubReaders(); + if (subs == null) { + // already an atomic reader + return r.perDocValues(); + } else if (subs.length == 0) { + // no fields + return null; + } else if (subs.length == 1) { + return getPerDocs(subs[0]); + } + PerDocValues perDocValues = r.retrievePerDoc(); + if (perDocValues == null) { + + final List producer = new ArrayList(); + final List slices = new ArrayList(); + + new ReaderUtil.Gather(r) { + @Override + protected void add(int base, IndexReader r) throws IOException { + final PerDocValues f = r.perDocValues(); + if (f != null) { + producer.add(f); + slices + .add(new ReaderUtil.Slice(base, r.maxDoc(), producer.size() - 1)); + } + } + }.run(); + + if (producer.size() == 0) { + return null; + } else if (producer.size() == 1) { + perDocValues = producer.get(0); + } else { + perDocValues = new MultiPerDocValues( + producer.toArray(PerDocValues.EMPTY_ARRAY), + slices.toArray(ReaderUtil.Slice.EMPTY_ARRAY)); + } + r.storePerDoc(perDocValues); + } + return perDocValues; + } + + public DocValues docValues(String field) throws IOException { + DocValues result = docValues.get(field); + if (result == null) { + // Lazy init: first time this field is requested, we + // create & add to docValues: + final List docValuesIndex = new ArrayList(); + int docsUpto = 0; + Type type = null; + // Gather all sub-readers that share this field + for (int i = 0; i < subs.length; i++) { + DocValues values = subs[i].docValues(field); + final int start = subSlices[i].start; + final int length = subSlices[i].length; + if (values != null) { + if (docsUpto != start) { + type = values.type(); + docValuesIndex.add(new MultiDocValues.DocValuesIndex( + new MultiDocValues.DummyDocValues(start, type), docsUpto, start + - docsUpto)); + } + docValuesIndex.add(new MultiDocValues.DocValuesIndex(values, start, + length)); + docsUpto = start + length; + + } else if (i + 1 == subs.length && !docValuesIndex.isEmpty()) { + docValuesIndex.add(new MultiDocValues.DocValuesIndex( + new MultiDocValues.DummyDocValues(start, type), docsUpto, start + - docsUpto)); + } + } + if (docValuesIndex.isEmpty()) { + return null; + } + result = new MultiDocValues( + docValuesIndex.toArray(DocValuesIndex.EMPTY_ARRAY)); + docValues.put(field, result); + } + return result; + } + + @Override + public void close() throws IOException { + PerDocValues[] perDocValues = this.subs; + for (PerDocValues values : perDocValues) { + values.close(); + } + } + + @Override + public Collection fields() { + return fields; + } +} diff --git a/lucene/src/java/org/apache/lucene/index/MultiReader.java b/lucene/src/java/org/apache/lucene/index/MultiReader.java index c2682e40231..7a943fadcd0 100644 --- a/lucene/src/java/org/apache/lucene/index/MultiReader.java +++ b/lucene/src/java/org/apache/lucene/index/MultiReader.java @@ -24,6 +24,7 @@ import java.util.concurrent.ConcurrentHashMap; import org.apache.lucene.document.Document; import org.apache.lucene.document.FieldSelector; +import org.apache.lucene.index.codecs.PerDocValues; import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.ReaderUtil; @@ -403,4 +404,9 @@ public class MultiReader extends IndexReader implements Cloneable { sub.removeReaderFinishedListener(listener); } } + + @Override + public PerDocValues perDocValues() throws IOException { + throw new UnsupportedOperationException("please use MultiPerDoc#getPerDocs, or wrap your IndexReader with SlowMultiReaderWrapper, if you really need a top level Fields"); + } } diff --git a/lucene/src/java/org/apache/lucene/index/NormsWriter.java b/lucene/src/java/org/apache/lucene/index/NormsWriter.java index e0cff83de02..5064a47f3bc 100644 --- a/lucene/src/java/org/apache/lucene/index/NormsWriter.java +++ b/lucene/src/java/org/apache/lucene/index/NormsWriter.java @@ -19,11 +19,7 @@ package org.apache.lucene.index; import java.io.IOException; import java.util.Collection; -import java.util.Iterator; -import java.util.HashMap; import java.util.Map; -import java.util.List; -import java.util.ArrayList; import org.apache.lucene.store.IndexOutput; @@ -36,10 +32,6 @@ import org.apache.lucene.store.IndexOutput; final class NormsWriter extends InvertedDocEndConsumer { - @Override - public InvertedDocEndConsumerPerThread addThread(DocInverterPerThread docInverterPerThread) { - return new NormsWriterPerThread(docInverterPerThread, this); - } @Override public void abort() {} @@ -50,40 +42,11 @@ final class NormsWriter extends InvertedDocEndConsumer { /** Produce _X.nrm if any document had a field with norms * not disabled */ @Override - public void flush(Map> threadsAndFields, SegmentWriteState state) throws IOException { - - final Map> byField = new HashMap>(); - + public void flush(Map fieldsToFlush, SegmentWriteState state) throws IOException { if (!state.fieldInfos.hasNorms()) { return; } - // Typically, each thread will have encountered the same - // field. So first we collate by field, ie, all - // per-thread field instances that correspond to the - // same FieldInfo - for (final Map.Entry> entry : threadsAndFields.entrySet()) { - final Collection fields = entry.getValue(); - final Iterator fieldsIt = fields.iterator(); - - while (fieldsIt.hasNext()) { - final NormsWriterPerField perField = (NormsWriterPerField) fieldsIt.next(); - - if (perField.upto > 0) { - // It has some norms - List l = byField.get(perField.fieldInfo); - if (l == null) { - l = new ArrayList(); - byField.put(perField.fieldInfo, l); - } - l.add(perField); - } else - // Remove this field since we haven't seen it - // since the previous flush - fieldsIt.remove(); - } - } - final String normsFileName = IndexFileNames.segmentFileName(state.segmentName, "", IndexFileNames.NORMS_EXTENSION); IndexOutput normsOut = state.directory.createOutput(normsFileName); @@ -93,60 +56,25 @@ final class NormsWriter extends InvertedDocEndConsumer { int normCount = 0; for (FieldInfo fi : state.fieldInfos) { - final List toMerge = byField.get(fi); + final NormsWriterPerField toWrite = (NormsWriterPerField) fieldsToFlush.get(fi); int upto = 0; - if (toMerge != null) { - - final int numFields = toMerge.size(); - + if (toWrite != null && toWrite.upto > 0) { normCount++; - final NormsWriterPerField[] fields = new NormsWriterPerField[numFields]; - int[] uptos = new int[numFields]; - - for(int j=0;j 0) { - - assert uptos[0] < fields[0].docIDs.length : " uptos[0]=" + uptos[0] + " len=" + (fields[0].docIDs.length); - - int minLoc = 0; - int minDocID = fields[0].docIDs[uptos[0]]; - - for(int j=1;j { - final NormsWriterPerThread perThread; final FieldInfo fieldInfo; - final DocumentsWriter.DocState docState; + final DocumentsWriterPerThread.DocState docState; final Similarity similarity; // Holds all docID/norm pairs we've seen @@ -46,10 +45,9 @@ final class NormsWriterPerField extends InvertedDocEndConsumerPerField implement upto = 0; } - public NormsWriterPerField(final DocInverterPerField docInverterPerField, final NormsWriterPerThread perThread, final FieldInfo fieldInfo) { - this.perThread = perThread; + public NormsWriterPerField(final DocInverterPerField docInverterPerField, final FieldInfo fieldInfo) { this.fieldInfo = fieldInfo; - docState = perThread.docState; + docState = docInverterPerField.docState; fieldState = docInverterPerField.fieldState; similarity = docState.similarityProvider.get(fieldInfo.name); } diff --git a/lucene/src/java/org/apache/lucene/index/ParallelReader.java b/lucene/src/java/org/apache/lucene/index/ParallelReader.java index 57476e2cd94..4b5d78d5682 100644 --- a/lucene/src/java/org/apache/lucene/index/ParallelReader.java +++ b/lucene/src/java/org/apache/lucene/index/ParallelReader.java @@ -21,9 +21,8 @@ import org.apache.lucene.document.Document; import org.apache.lucene.document.FieldSelector; import org.apache.lucene.document.FieldSelectorResult; import org.apache.lucene.document.Fieldable; -import org.apache.lucene.index.values.DocValues; +import org.apache.lucene.index.codecs.PerDocValues; import org.apache.lucene.util.Bits; -import org.apache.lucene.util.Pair; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.MapBackedSet; @@ -183,21 +182,15 @@ public class ParallelReader extends IndexReader { } } - @Override - public DocValues docValues() throws IOException { - assert currentReader != null; - return MultiFields.getDocValues(currentReader, currentField); - } } // Single instance of this, per ParallelReader instance private class ParallelFields extends Fields { - final HashMap> fields = new HashMap>(); + final HashMap fields = new HashMap(); public void addField(String field, IndexReader r) throws IOException { Fields multiFields = MultiFields.getFields(r); - fields.put(field, new Pair( multiFields.terms(field), - multiFields.docValues(field))); + fields.put(field, multiFields.terms(field)); } @Override @@ -206,12 +199,7 @@ public class ParallelReader extends IndexReader { } @Override public Terms terms(String field) throws IOException { - return fields.get(field).cur; - } - - @Override - public DocValues docValues(String field) throws IOException { - return fields.get(field).cud; + return fields.get(field); } } @@ -578,6 +566,12 @@ public class ParallelReader extends IndexReader { reader.removeReaderFinishedListener(listener); } } + + @Override + public PerDocValues perDocValues() throws IOException { + // TODO Auto-generated method stub + return null; + } } diff --git a/lucene/src/java/org/apache/lucene/index/PerDocWriteState.java b/lucene/src/java/org/apache/lucene/index/PerDocWriteState.java new file mode 100644 index 00000000000..652f1b6d5a5 --- /dev/null +++ b/lucene/src/java/org/apache/lucene/index/PerDocWriteState.java @@ -0,0 +1,77 @@ +package org.apache.lucene.index; +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +import java.io.PrintStream; +import java.util.concurrent.atomic.AtomicLong; + +import org.apache.lucene.store.Directory; + +/** + * nocommit - javadoc + * @lucene.experimental + */ +public class PerDocWriteState { + public final PrintStream infoStream; + public final Directory directory; + public final String segmentName; + public final FieldInfos fieldInfos; + public final AtomicLong bytesUsed; + public final SegmentCodecs segmentCodecs; + public final int codecId; + + /** Expert: The fraction of terms in the "dictionary" which should be stored + * in RAM. Smaller values use more memory, but make searching slightly + * faster, while larger values use less memory and make searching slightly + * slower. Searching is typically not dominated by dictionary lookup, so + * tweaking this is rarely useful.*/ + public int termIndexInterval; // TODO: this should be private to the codec, not settable here or in IWC + + public PerDocWriteState(PrintStream infoStream, Directory directory, String segmentName, FieldInfos fieldInfos, AtomicLong bytesUsed, int codecId) { + this.infoStream = infoStream; + this.directory = directory; + this.segmentName = segmentName; + this.fieldInfos = fieldInfos; + this.segmentCodecs = fieldInfos.buildSegmentCodecs(false); + this.codecId = codecId; + this.bytesUsed = bytesUsed; + } + + public PerDocWriteState(SegmentWriteState state) { + infoStream = state.infoStream; + directory = state.directory; + segmentCodecs = state.segmentCodecs; + segmentName = state.segmentName; + fieldInfos = state.fieldInfos; + codecId = state.codecId; + bytesUsed = new AtomicLong(0); + } + + public PerDocWriteState(PerDocWriteState state, int codecId) { + this.infoStream = state.infoStream; + this.directory = state.directory; + this.segmentName = state.segmentName; + this.fieldInfos = state.fieldInfos; + this.segmentCodecs = state.segmentCodecs; + this.codecId = codecId; + this.bytesUsed = state.bytesUsed; + } + + + public String codecIdAsString() { + return "" + codecId; + } +} diff --git a/lucene/src/java/org/apache/lucene/index/PerFieldCodecWrapper.java b/lucene/src/java/org/apache/lucene/index/PerFieldCodecWrapper.java index d441aa4e5c3..d1acaf46a5e 100644 --- a/lucene/src/java/org/apache/lucene/index/PerFieldCodecWrapper.java +++ b/lucene/src/java/org/apache/lucene/index/PerFieldCodecWrapper.java @@ -19,6 +19,7 @@ package org.apache.lucene.index; import java.io.IOException; import java.util.ArrayList; +import java.util.Collection; import java.util.HashMap; import java.util.Iterator; import java.util.Map; @@ -28,6 +29,8 @@ import java.util.TreeSet; import org.apache.lucene.index.codecs.Codec; import org.apache.lucene.index.codecs.FieldsConsumer; import org.apache.lucene.index.codecs.FieldsProducer; +import org.apache.lucene.index.codecs.PerDocConsumer; +import org.apache.lucene.index.codecs.PerDocValues; import org.apache.lucene.index.codecs.TermsConsumer; import org.apache.lucene.index.codecs.docvalues.DocValuesConsumer; import org.apache.lucene.index.values.DocValues; @@ -74,12 +77,6 @@ final class PerFieldCodecWrapper extends Codec { return fields.addField(field); } - @Override - public DocValuesConsumer addValuesField(FieldInfo field) throws IOException { - final FieldsConsumer fields = consumers.get(field.getCodecId()); - return fields.addValuesField(field); - } - @Override public void close() throws IOException { Iterator it = consumers.iterator(); @@ -113,7 +110,7 @@ final class PerFieldCodecWrapper extends Codec { boolean success = false; try { for (FieldInfo fi : fieldInfos) { - if (fi.isIndexed || fi.hasDocValues()) { // TODO this does not work for non-indexed fields + if (fi.isIndexed) { fields.add(fi.name); assert fi.getCodecId() != FieldInfo.UNASSIGNED_CODEC_ID; Codec codec = segmentCodecs.codecs[fi.getCodecId()]; @@ -171,11 +168,6 @@ final class PerFieldCodecWrapper extends Codec { return TermsEnum.EMPTY; } } - - @Override - public DocValues docValues() throws IOException { - return codecs.get(current).docValues(current); - } } @Override @@ -189,12 +181,6 @@ final class PerFieldCodecWrapper extends Codec { return fields == null ? null : fields.terms(field); } - @Override - public DocValues docValues(String field) throws IOException { - FieldsProducer fieldsProducer = codecs.get(field); - return fieldsProducer == null? null: fieldsProducer.docValues(field); - } - @Override public void close() throws IOException { Iterator it = codecs.values().iterator(); @@ -244,4 +230,133 @@ final class PerFieldCodecWrapper extends Codec { codec.getExtensions(extensions); } } + + @Override + public PerDocConsumer docsConsumer(PerDocWriteState state) throws IOException { + return new PerDocConsumers(state); + } + + @Override + public PerDocValues docsProducer(SegmentReadState state) throws IOException { + return new PerDocProducers(state.dir, state.fieldInfos, state.segmentInfo, + state.readBufferSize, state.termsIndexDivisor); + } + + private final class PerDocProducers extends PerDocValues { + private final Set fields = new TreeSet(); + private final Map codecs = new HashMap(); + + public PerDocProducers(Directory dir, FieldInfos fieldInfos, SegmentInfo si, + int readBufferSize, int indexDivisor) throws IOException { + final Map producers = new HashMap(); + boolean success = false; + try { + for (FieldInfo fi : fieldInfos) { + if (fi.hasDocValues()) { + fields.add(fi.name); + assert fi.getCodecId() != FieldInfo.UNASSIGNED_CODEC_ID; + Codec codec = segmentCodecs.codecs[fi.getCodecId()]; + if (!producers.containsKey(codec)) { + producers.put(codec, codec.docsProducer(new SegmentReadState(dir, + si, fieldInfos, readBufferSize, indexDivisor, fi.getCodecId()))); + } + codecs.put(fi.name, producers.get(codec)); + } + } + success = true; + } finally { + if (!success) { + // If we hit exception (eg, IOE because writer was + // committing, or, for any other reason) we must + // go back and close all FieldsProducers we opened: + for(PerDocValues producer : producers.values()) { + try { + producer.close(); + } catch (Throwable t) { + // Suppress all exceptions here so we continue + // to throw the original one + } + } + } + } + } + @Override + public Collection fields() { + return fields; + } + @Override + public DocValues docValues(String field) throws IOException { + final PerDocValues perDocProducer = codecs.get(field); + if (perDocProducer == null) { + return null; + } + return perDocProducer.docValues(field); + } + + @Override + public void close() throws IOException { + final Iterator it = codecs.values().iterator(); + IOException err = null; + while (it.hasNext()) { + try { + it.next().close(); + } catch (IOException ioe) { + // keep first IOException we hit but keep + // closing the rest + if (err == null) { + err = ioe; + } + } + } + if (err != null) { + throw err; + } + } + } + + private final class PerDocConsumers extends PerDocConsumer { + private final ArrayList consumers = new ArrayList(); + + public PerDocConsumers(PerDocWriteState state) throws IOException { + assert segmentCodecs == state.segmentCodecs; + final Codec[] codecs = segmentCodecs.codecs; + for (int i = 0; i < codecs.length; i++) { + consumers.add(codecs[i].docsConsumer(new PerDocWriteState(state, i))); + } + } + + @Override + public void close() throws IOException { + Iterator it = consumers.iterator(); + IOException err = null; + while (it.hasNext()) { + try { + PerDocConsumer next = it.next(); + if (next != null) { + next.close(); + } + } catch (IOException ioe) { + // keep first IOException we hit but keep + // closing the rest + if (err == null) { + err = ioe; + } + } + } + if (err != null) { + throw err; + } + } + + @Override + public DocValuesConsumer addValuesField(FieldInfo field) throws IOException { + assert field.getCodecId() != FieldInfo.UNASSIGNED_CODEC_ID; + final PerDocConsumer perDoc = consumers.get(field.getCodecId()); + if (perDoc == null) { + return null; + } + return perDoc.addValuesField(field); + } + + } } diff --git a/lucene/src/java/org/apache/lucene/index/SegmentInfo.java b/lucene/src/java/org/apache/lucene/index/SegmentInfo.java index 679367e4bf4..f7999da4219 100644 --- a/lucene/src/java/org/apache/lucene/index/SegmentInfo.java +++ b/lucene/src/java/org/apache/lucene/index/SegmentInfo.java @@ -39,14 +39,14 @@ import org.apache.lucene.util.Constants; /** * Information about a segment such as it's name, directory, and files related * to the segment. - * + * * @lucene.experimental */ public final class SegmentInfo { static final int NO = -1; // e.g. no norms; no deletes; static final int YES = 1; // e.g. have norms; have deletes; - static final int WITHOUT_GEN = 0; // a file name that has no GEN in it. + static final int WITHOUT_GEN = 0; // a file name that has no GEN in it. public String name; // unique name in dir public int docCount; // number of docs in seg @@ -58,7 +58,7 @@ public final class SegmentInfo { * - YES or higher if there are deletes at generation N */ private long delGen; - + /* * Current generation of each field's norm file. If this array is null, * means no separate norms. If this array is not null, its values mean: @@ -67,7 +67,7 @@ public final class SegmentInfo { */ private Map normGen; - private boolean isCompoundFile; + private boolean isCompoundFile; private volatile List files; // cached list of files that this segment uses // in the Directory @@ -75,10 +75,13 @@ public final class SegmentInfo { private volatile long sizeInBytesNoStore = -1; // total byte size of all but the store files (computed on demand) private volatile long sizeInBytesWithStore = -1; // total byte size of all of our files (computed on demand) + //TODO: LUCENE-2555: remove once we don't need to support shared doc stores (pre 4.0) private int docStoreOffset; // if this segment shares stored fields & vectors, this // offset is where in that file this segment's docs begin + //TODO: LUCENE-2555: remove once we don't need to support shared doc stores (pre 4.0) private String docStoreSegment; // name used to derive fields/vectors file we share with // other segments + //TODO: LUCENE-2555: remove once we don't need to support shared doc stores (pre 4.0) private boolean docStoreIsCompoundFile; // whether doc store files are stored in compound file (*.cfx) private int delCount; // How many deleted docs in this segment @@ -93,9 +96,9 @@ public final class SegmentInfo { private Map diagnostics; - // Tracks the Lucene version this segment was created with, since 3.1. Null + // Tracks the Lucene version this segment was created with, since 3.1. Null // indicates an older than 3.0 index, and it's used to detect a too old index. - // The format expected is "x.y" - "2.x" for pre-3.0 indexes (or null), and + // The format expected is "x.y" - "2.x" for pre-3.0 indexes (or null), and // specific versions afterwards ("3.0", "3.1" etc.). // see Constants.LUCENE_MAIN_VERSION. private String version; @@ -103,7 +106,7 @@ public final class SegmentInfo { // NOTE: only used in-RAM by IW to track buffered deletes; // this is never written to/read from the Directory private long bufferedDeletesGen; - + public SegmentInfo(String name, int docCount, Directory dir, boolean isCompoundFile, boolean hasProx, SegmentCodecs segmentCodecs, boolean hasVectors, FieldInfos fieldInfos) { this.name = name; @@ -184,11 +187,13 @@ public final class SegmentInfo { docStoreSegment = name; docStoreIsCompoundFile = false; } + if (format > DefaultSegmentInfosWriter.FORMAT_4_0) { // pre-4.0 indexes write a byte if there is a single norms file byte b = input.readByte(); assert 1 == b; } + int numNormGen = input.readInt(); if (numNormGen == NO) { normGen = null; @@ -209,7 +214,7 @@ public final class SegmentInfo { assert delCount <= docCount; hasProx = input.readByte() == YES; - + // System.out.println(Thread.currentThread().getName() + ": si.read hasProx=" + hasProx + " seg=" + name); if (format <= DefaultSegmentInfosWriter.FORMAT_4_0) { segmentCodecs = new SegmentCodecs(codecs, input); @@ -219,7 +224,7 @@ public final class SegmentInfo { segmentCodecs = new SegmentCodecs(codecs, new Codec[] { codecs.lookup("PreFlex")}); } diagnostics = input.readStringStringMap(); - + if (format <= DefaultSegmentInfosWriter.FORMAT_HAS_VECTORS) { hasVectors = input.readByte() == 1; } else { @@ -368,7 +373,7 @@ public final class SegmentInfo { // against this segment return null; } else { - return IndexFileNames.fileNameFromGeneration(name, IndexFileNames.DELETES_EXTENSION, delGen); + return IndexFileNames.fileNameFromGeneration(name, IndexFileNames.DELETES_EXTENSION, delGen); } } @@ -434,7 +439,7 @@ public final class SegmentInfo { if (hasSeparateNorms(number)) { return IndexFileNames.fileNameFromGeneration(name, "s" + number, normGen.get(number)); } else { - // single file for all norms + // single file for all norms return IndexFileNames.fileNameFromGeneration(name, IndexFileNames.NORMS_EXTENSION, WITHOUT_GEN); } } @@ -467,39 +472,74 @@ public final class SegmentInfo { assert delCount <= docCount; } + /** + * @deprecated shared doc stores are not supported in >= 4.0 + */ + @Deprecated public int getDocStoreOffset() { + // TODO: LUCENE-2555: remove once we don't need to support shared doc stores (pre 4.0) return docStoreOffset; } - + + /** + * @deprecated shared doc stores are not supported in >= 4.0 + */ + @Deprecated public boolean getDocStoreIsCompoundFile() { + // TODO: LUCENE-2555: remove once we don't need to support shared doc stores (pre 4.0) return docStoreIsCompoundFile; } - - void setDocStoreIsCompoundFile(boolean v) { - docStoreIsCompoundFile = v; - clearFilesCache(); - } - - public String getDocStoreSegment() { - return docStoreSegment; - } - - public void setDocStoreSegment(String segment) { - docStoreSegment = segment; - } - - void setDocStoreOffset(int offset) { - docStoreOffset = offset; + + /** + * @deprecated shared doc stores are not supported in >= 4.0 + */ + @Deprecated + public void setDocStoreIsCompoundFile(boolean docStoreIsCompoundFile) { + // TODO: LUCENE-2555: remove once we don't need to support shared doc stores (pre 4.0) + this.docStoreIsCompoundFile = docStoreIsCompoundFile; clearFilesCache(); } - void setDocStore(int offset, String segment, boolean isCompoundFile) { + /** + * @deprecated shared doc stores are not supported in >= 4.0 + */ + @Deprecated + void setDocStore(int offset, String segment, boolean isCompoundFile) { + // TODO: LUCENE-2555: remove once we don't need to support shared doc stores (pre 4.0) docStoreOffset = offset; docStoreSegment = segment; docStoreIsCompoundFile = isCompoundFile; clearFilesCache(); } - + + /** + * @deprecated shared doc stores are not supported in >= 4.0 + */ + @Deprecated + public String getDocStoreSegment() { + // TODO: LUCENE-2555: remove once we don't need to support shared doc stores (pre 4.0) + return docStoreSegment; + } + + /** + * @deprecated shared doc stores are not supported in >= 4.0 + */ + @Deprecated + void setDocStoreOffset(int offset) { + // TODO: LUCENE-2555: remove once we don't need to support shared doc stores (pre 4.0) + docStoreOffset = offset; + clearFilesCache(); + } + + /** + * @deprecated shared doc stores are not supported in 4.0 + */ + @Deprecated + public void setDocStoreSegment(String docStoreSegment) { + // TODO: LUCENE-2555: remove once we don't need to support shared doc stores (pre 4.0) + this.docStoreSegment = docStoreSegment; + } + /** Save this segment's info. */ public void write(IndexOutput output) throws IOException { @@ -509,12 +549,14 @@ public final class SegmentInfo { output.writeString(name); output.writeInt(docCount); output.writeLong(delGen); + output.writeInt(docStoreOffset); if (docStoreOffset != -1) { output.writeString(docStoreSegment); output.writeByte((byte) (docStoreIsCompoundFile ? 1:0)); } + if (normGen == null) { output.writeInt(NO); } else { @@ -524,7 +566,7 @@ public final class SegmentInfo { output.writeLong(entry.getValue()); } } - + output.writeByte((byte) (isCompoundFile ? YES : NO)); output.writeInt(delCount); output.writeByte((byte) (hasProx ? 1:0)); @@ -572,9 +614,9 @@ public final class SegmentInfo { // Already cached: return files; } - + Set fileSet = new HashSet(); - + boolean useCompoundFile = getUseCompoundFile(); if (useCompoundFile) { @@ -608,7 +650,7 @@ public final class SegmentInfo { fileSet.add(IndexFileNames.segmentFileName(name, "", IndexFileNames.VECTORS_INDEX_EXTENSION)); fileSet.add(IndexFileNames.segmentFileName(name, "", IndexFileNames.VECTORS_DOCUMENTS_EXTENSION)); fileSet.add(IndexFileNames.segmentFileName(name, "", IndexFileNames.VECTORS_FIELDS_EXTENSION)); - } + } } String delFileName = IndexFileNames.fileNameFromGeneration(name, IndexFileNames.DELETES_EXTENSION, delGen); @@ -646,7 +688,7 @@ public final class SegmentInfo { } /** Used for debugging. Format may suddenly change. - * + * *

Current format looks like * _a(3.1):c45/4->_1, which means the segment's * name is _a; it was created with Lucene 3.1 (or @@ -661,7 +703,6 @@ public final class SegmentInfo { StringBuilder s = new StringBuilder(); s.append(name).append('(').append(version == null ? "?" : version).append(')').append(':'); - char cfs = getUseCompoundFile() ? 'c' : 'C'; s.append(cfs); @@ -677,7 +718,7 @@ public final class SegmentInfo { if (delCount != 0) { s.append('/').append(delCount); } - + if (docStoreOffset != -1) { s.append("->").append(docStoreSegment); if (docStoreIsCompoundFile) { @@ -717,13 +758,13 @@ public final class SegmentInfo { * NOTE: this method is used for internal purposes only - you should * not modify the version of a SegmentInfo, or it may result in unexpected * exceptions thrown when you attempt to open the index. - * + * * @lucene.internal */ public void setVersion(String version) { this.version = version; } - + /** Returns the version of the code which wrote the segment. */ public String getVersion() { return version; diff --git a/lucene/src/java/org/apache/lucene/index/SegmentMerger.java b/lucene/src/java/org/apache/lucene/index/SegmentMerger.java index 2303207149c..46c050e3588 100644 --- a/lucene/src/java/org/apache/lucene/index/SegmentMerger.java +++ b/lucene/src/java/org/apache/lucene/index/SegmentMerger.java @@ -22,7 +22,6 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; import java.util.List; -import java.util.concurrent.atomic.AtomicLong; import org.apache.lucene.document.Document; import org.apache.lucene.index.IndexReader.FieldOption; @@ -31,6 +30,8 @@ import org.apache.lucene.index.codecs.Codec; import org.apache.lucene.index.codecs.CodecProvider; import org.apache.lucene.index.codecs.FieldsConsumer; import org.apache.lucene.index.codecs.MergeState; +import org.apache.lucene.index.codecs.PerDocConsumer; +import org.apache.lucene.index.codecs.PerDocValues; import org.apache.lucene.store.Directory; import org.apache.lucene.store.IndexInput; import org.apache.lucene.store.IndexOutput; @@ -40,24 +41,24 @@ import org.apache.lucene.util.ReaderUtil; /** * The SegmentMerger class combines two or more Segments, represented by an IndexReader ({@link #add}, - * into a single Segment. After adding the appropriate readers, call the merge method to combine the + * into a single Segment. After adding the appropriate readers, call the merge method to combine the * segments. - * + * * @see #merge * @see #add */ final class SegmentMerger { - + /** norms header placeholder */ - static final byte[] NORMS_HEADER = new byte[]{'N','R','M',-1}; - + static final byte[] NORMS_HEADER = new byte[]{'N','R','M',-1}; + private Directory directory; private String segment; private int termIndexInterval = IndexWriterConfig.DEFAULT_TERM_INDEX_INTERVAL; private List readers = new ArrayList(); private final FieldInfos fieldInfos; - + private int mergedDocs; private final MergeState.CheckAbort checkAbort; @@ -65,12 +66,12 @@ final class SegmentMerger { /** Maximum number of contiguous documents to bulk-copy when merging stored fields */ private final static int MAX_RAW_MERGE_DOCS = 4192; - + private Codec codec; private SegmentWriteState segmentWriteState; private PayloadProcessorProvider payloadProcessorProvider; - + SegmentMerger(Directory dir, int termIndexInterval, String name, MergePolicy.OneMerge merge, CodecProvider codecs, PayloadProcessorProvider payloadProcessorProvider, FieldInfos fieldInfos) { this.payloadProcessorProvider = payloadProcessorProvider; directory = dir; @@ -133,10 +134,10 @@ final class SegmentMerger { for (String file : files) { cfsWriter.addFile(file); } - + // Perform the merge cfsWriter.close(); - + return files; } @@ -194,13 +195,12 @@ final class SegmentMerger { } /** - * + * * @return The number of documents in all of the readers * @throws CorruptIndexException if the index is corrupt * @throws IOException if there is a low-level IO error */ private int mergeFields() throws CorruptIndexException, IOException { - for (IndexReader reader : readers) { if (reader instanceof SegmentReader) { SegmentReader segmentReader = (SegmentReader) reader; @@ -263,8 +263,8 @@ final class SegmentMerger { // details. throw new RuntimeException("mergeFields produced an invalid result: docCount is " + docCount + " but fdx file size is " + fdxFileLength + " file=" + fileName + " file exists?=" + directory.fileExists(fileName) + "; now aborting this merge to prevent index corruption"); - segmentWriteState = new SegmentWriteState(null, directory, segment, fieldInfos, docCount, termIndexInterval, codecInfo, null, new AtomicLong(0)); - + segmentWriteState = new SegmentWriteState(null, directory, segment, fieldInfos, docCount, termIndexInterval, codecInfo, null); + return docCount; } @@ -282,7 +282,7 @@ final class SegmentMerger { ++j; continue; } - // We can optimize this case (doing a bulk byte copy) since the field + // We can optimize this case (doing a bulk byte copy) since the field // numbers are identical int start = j, numDocs = 0; do { @@ -294,7 +294,7 @@ final class SegmentMerger { break; } } while(numDocs < MAX_RAW_MERGE_DOCS); - + IndexInput stream = matchingFieldsReader.rawDocs(rawDocLengths, start, numDocs); fieldsWriter.addRawDocuments(stream, rawDocLengths, numDocs); docCount += numDocs; @@ -348,7 +348,7 @@ final class SegmentMerger { * @throws IOException */ private final void mergeVectors() throws IOException { - TermVectorsWriter termVectorsWriter = + TermVectorsWriter termVectorsWriter = new TermVectorsWriter(directory, segment, fieldInfos); try { @@ -368,7 +368,7 @@ final class SegmentMerger { copyVectorsWithDeletions(termVectorsWriter, matchingVectorsReader, reader); } else { copyVectorsNoDeletions(termVectorsWriter, matchingVectorsReader, reader); - + } } } finally { @@ -401,7 +401,7 @@ final class SegmentMerger { ++docNum; continue; } - // We can optimize this case (doing a bulk byte copy) since the field + // We can optimize this case (doing a bulk byte copy) since the field // numbers are identical int start = docNum, numDocs = 0; do { @@ -413,7 +413,7 @@ final class SegmentMerger { break; } } while(numDocs < MAX_RAW_MERGE_DOCS); - + matchingVectorsReader.rawDocs(rawDocLengths, rawDocLengths2, start, numDocs); termVectorsWriter.addRawDocuments(matchingVectorsReader, rawDocLengths, rawDocLengths2, numDocs); checkAbort.work(300 * numDocs); @@ -424,7 +424,7 @@ final class SegmentMerger { // skip deleted docs continue; } - + // NOTE: it's very important to first assign to vectors then pass it to // termVectorsWriter.addAllDocVectors; see LUCENE-1282 TermFreqVector[] vectors = reader.getTermFreqVectors(docNum); @@ -433,7 +433,7 @@ final class SegmentMerger { } } } - + private void copyVectorsNoDeletions(final TermVectorsWriter termVectorsWriter, final TermVectorsReader matchingVectorsReader, final IndexReader reader) @@ -469,13 +469,20 @@ final class SegmentMerger { // Let CodecProvider decide which codec will be used to write // the new segment: - + int docBase = 0; final List fields = new ArrayList(); + final List slices = new ArrayList(); final List bits = new ArrayList(); final List bitsStarts = new ArrayList(); + + // TODO: move this into its own method - this merges currently only docvalues + final List perDocProducers = new ArrayList(); + final List perDocSlices = new ArrayList(); + final List perDocBits = new ArrayList(); + final List perDocBitsStarts = new ArrayList(); for(IndexReader r : readers) { final Fields f = r.fields(); @@ -486,10 +493,18 @@ final class SegmentMerger { bits.add(r.getDeletedDocs()); bitsStarts.add(docBase); } + final PerDocValues producer = r.perDocValues(); + if (producer != null) { + perDocSlices.add(new ReaderUtil.Slice(docBase, maxDoc, fields.size())); + perDocProducers.add(producer); + perDocBits.add(r.getDeletedDocs()); + perDocBitsStarts.add(docBase); + } docBase += maxDoc; } bitsStarts.add(docBase); + perDocBitsStarts.add(docBase); // we may gather more readers than mergeState.readerCount mergeState = new MergeState(); @@ -497,7 +512,7 @@ final class SegmentMerger { mergeState.readerCount = readers.size(); mergeState.fieldInfos = fieldInfos; mergeState.mergedDocCount = mergedDocs; - + // Remap docIDs mergeState.delCounts = new int[mergeState.readerCount]; mergeState.docMaps = new int[mergeState.readerCount][]; @@ -535,7 +550,7 @@ final class SegmentMerger { } assert delCount == mergeState.delCounts[i]: "reader delCount=" + mergeState.delCounts[i] + " vs recomputed delCount=" + delCount; } - + if (payloadProcessorProvider != null) { mergeState.dirPayloadProcessor[i] = payloadProcessorProvider.getDirProcessor(reader.directory()); } @@ -548,7 +563,7 @@ final class SegmentMerger { // apart when we step through the docs enums in // MultiDocsEnum. mergeState.multiDeletedDocs = new MultiBits(bits, bitsStarts); - + try { consumer.merge(mergeState, new MultiFields(fields.toArray(Fields.EMPTY_ARRAY), @@ -556,6 +571,21 @@ final class SegmentMerger { } finally { consumer.close(); } + if (!perDocSlices.isEmpty()) { + mergeState.multiDeletedDocs = new MultiBits(perDocBits, perDocBitsStarts); + final PerDocConsumer docsConsumer = codec + .docsConsumer(new PerDocWriteState(segmentWriteState)); + try { + docsConsumer.merge( + mergeState, + new MultiPerDocValues(perDocProducers + .toArray(PerDocValues.EMPTY_ARRAY), perDocSlices + .toArray(ReaderUtil.Slice.EMPTY_ARRAY))); + } finally { + docsConsumer.close(); + } + } + } private MergeState mergeState; @@ -567,7 +597,7 @@ final class SegmentMerger { int[] getDelCounts() { return mergeState.delCounts; } - + public boolean getAnyNonBulkMerges() { assert matchedCount <= readers.size(); return matchedCount != readers.size(); @@ -578,7 +608,7 @@ final class SegmentMerger { try { for (FieldInfo fi : fieldInfos) { if (fi.isIndexed && !fi.omitNorms) { - if (output == null) { + if (output == null) { output = directory.createOutput(IndexFileNames.segmentFileName(segment, "", IndexFileNames.NORMS_EXTENSION)); output.writeBytes(NORMS_HEADER,NORMS_HEADER.length); } @@ -609,7 +639,7 @@ final class SegmentMerger { } } } finally { - if (output != null) { + if (output != null) { output.close(); } } diff --git a/lucene/src/java/org/apache/lucene/index/SegmentReader.java b/lucene/src/java/org/apache/lucene/index/SegmentReader.java index 90607888af6..0aa94487adf 100644 --- a/lucene/src/java/org/apache/lucene/index/SegmentReader.java +++ b/lucene/src/java/org/apache/lucene/index/SegmentReader.java @@ -28,20 +28,16 @@ import java.util.Set; import java.util.concurrent.atomic.AtomicInteger; import org.apache.lucene.document.Document; -import org.apache.lucene.document.Field; import org.apache.lucene.document.FieldSelector; import org.apache.lucene.index.codecs.FieldsProducer; +import org.apache.lucene.index.codecs.PerDocValues; import org.apache.lucene.store.BufferedIndexInput; import org.apache.lucene.store.Directory; import org.apache.lucene.store.IndexInput; import org.apache.lucene.store.IndexOutput; import org.apache.lucene.util.BitVector; import org.apache.lucene.util.Bits; -import org.apache.lucene.index.values.Bytes; -import org.apache.lucene.index.values.Ints; import org.apache.lucene.index.values.DocValues; -import org.apache.lucene.index.values.Floats; -import org.apache.lucene.index.values.Type; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.CloseableThreadLocal; @@ -61,6 +57,9 @@ public class SegmentReader extends IndexReader implements Cloneable { AtomicInteger deletedDocsRef = null; private boolean deletedDocsDirty = false; private boolean normsDirty = false; + + // TODO: we should move this tracking into SegmentInfo; + // this way SegmentInfo.toString shows pending deletes private int pendingDeleteCount; private boolean rollbackHasChanges = false; @@ -91,6 +90,7 @@ public class SegmentReader extends IndexReader implements Cloneable { final FieldInfos fieldInfos; final FieldsProducer fields; + final PerDocValues perDocProducer; final Directory dir; final Directory cfsDir; @@ -130,8 +130,10 @@ public class SegmentReader extends IndexReader implements Cloneable { this.termsIndexDivisor = termsIndexDivisor; // Ask codec for its Fields - fields = segmentCodecs.codec().fieldsProducer(new SegmentReadState(cfsDir, si, fieldInfos, readBufferSize, termsIndexDivisor)); + final SegmentReadState segmentReadState = new SegmentReadState(cfsDir, si, fieldInfos, readBufferSize, termsIndexDivisor); + fields = segmentCodecs.codec().fieldsProducer(segmentReadState); assert fields != null; + perDocProducer = segmentCodecs.codec().docsProducer(segmentReadState); success = true; } finally { if (!success) { @@ -169,6 +171,10 @@ public class SegmentReader extends IndexReader implements Cloneable { if (fields != null) { fields.close(); } + + if (perDocProducer != null) { + perDocProducer.close(); + } if (termVectorsReaderOrig != null) { termVectorsReaderOrig.close(); @@ -808,8 +814,9 @@ public class SegmentReader extends IndexReader implements Cloneable { oldRef.decrementAndGet(); } deletedDocsDirty = true; - if (!deletedDocs.getAndSet(docNum)) + if (!deletedDocs.getAndSet(docNum)) { pendingDeleteCount++; + } } @Override @@ -1211,6 +1218,11 @@ public class SegmentReader extends IndexReader implements Cloneable { @Override public DocValues docValues(String field) throws IOException { - return core.fields.docValues(field); + return core.perDocProducer.docValues(field); + } + + @Override + public PerDocValues perDocValues() throws IOException { + return core.perDocProducer; } } diff --git a/lucene/src/java/org/apache/lucene/index/SegmentWriteState.java b/lucene/src/java/org/apache/lucene/index/SegmentWriteState.java index 1b273f56cad..c29add9bd93 100644 --- a/lucene/src/java/org/apache/lucene/index/SegmentWriteState.java +++ b/lucene/src/java/org/apache/lucene/index/SegmentWriteState.java @@ -33,7 +33,6 @@ public class SegmentWriteState { public final FieldInfos fieldInfos; public final int numDocs; public boolean hasVectors; - public final AtomicLong bytesUsed; // Deletes to apply while we are flushing the segment. A // Term is enrolled in here if it was deleted at one @@ -56,7 +55,7 @@ public class SegmentWriteState { public int termIndexInterval; // TODO: this should be private to the codec, not settable here or in IWC public SegmentWriteState(PrintStream infoStream, Directory directory, String segmentName, FieldInfos fieldInfos, - int numDocs, int termIndexInterval, SegmentCodecs segmentCodecs, BufferedDeletes segDeletes, AtomicLong bytesUsed) { + int numDocs, int termIndexInterval, SegmentCodecs segmentCodecs, BufferedDeletes segDeletes) { this.infoStream = infoStream; this.segDeletes = segDeletes; this.directory = directory; @@ -66,7 +65,6 @@ public class SegmentWriteState { this.termIndexInterval = termIndexInterval; this.segmentCodecs = segmentCodecs; codecId = -1; - this.bytesUsed = bytesUsed; } /** @@ -82,7 +80,6 @@ public class SegmentWriteState { segmentCodecs = state.segmentCodecs; this.codecId = codecId; segDeletes = state.segDeletes; - bytesUsed = state.bytesUsed; } public String codecIdAsString() { diff --git a/lucene/src/java/org/apache/lucene/index/SlowMultiReaderWrapper.java b/lucene/src/java/org/apache/lucene/index/SlowMultiReaderWrapper.java index 78c834f8008..82e976098ff 100644 --- a/lucene/src/java/org/apache/lucene/index/SlowMultiReaderWrapper.java +++ b/lucene/src/java/org/apache/lucene/index/SlowMultiReaderWrapper.java @@ -26,6 +26,7 @@ import org.apache.lucene.util.ReaderUtil; // javadoc import org.apache.lucene.index.DirectoryReader; // javadoc import org.apache.lucene.index.MultiReader; // javadoc +import org.apache.lucene.index.codecs.PerDocValues; /** * This class forces a composite reader (eg a {@link @@ -64,6 +65,11 @@ public final class SlowMultiReaderWrapper extends FilterIndexReader { return MultiFields.getFields(in); } + @Override + public PerDocValues perDocValues() throws IOException { + return MultiPerDocValues.getPerDocs(in); + } + @Override public Bits getDeletedDocs() { return MultiFields.getDeletedDocs(in); diff --git a/lucene/src/java/org/apache/lucene/index/StoredFieldsWriter.java b/lucene/src/java/org/apache/lucene/index/StoredFieldsWriter.java index 9f04dcb9786..c3aa5c86b60 100644 --- a/lucene/src/java/org/apache/lucene/index/StoredFieldsWriter.java +++ b/lucene/src/java/org/apache/lucene/index/StoredFieldsWriter.java @@ -18,7 +18,8 @@ package org.apache.lucene.index; */ import java.io.IOException; -import org.apache.lucene.store.RAMOutputStream; + +import org.apache.lucene.document.Fieldable; import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.RamUsageEstimator; @@ -26,22 +27,38 @@ import org.apache.lucene.util.RamUsageEstimator; final class StoredFieldsWriter { FieldsWriter fieldsWriter; - final DocumentsWriter docWriter; + final DocumentsWriterPerThread docWriter; int lastDocID; - PerDoc[] docFreeList = new PerDoc[1]; int freeCount; - public StoredFieldsWriter(DocumentsWriter docWriter) { + final DocumentsWriterPerThread.DocState docState; + + public StoredFieldsWriter(DocumentsWriterPerThread docWriter) { this.docWriter = docWriter; + this.docState = docWriter.docState; } - public StoredFieldsWriterPerThread addThread(DocumentsWriter.DocState docState) throws IOException { - return new StoredFieldsWriterPerThread(docState, this); + private int numStoredFields; + private Fieldable[] storedFields; + private int[] fieldNumbers; + + public void reset() { + numStoredFields = 0; + storedFields = new Fieldable[1]; + fieldNumbers = new int[1]; } - synchronized public void flush(SegmentWriteState state) throws IOException { - if (state.numDocs > lastDocID) { + public void startDocument() { + reset(); + } + + public void flush(SegmentWriteState state) throws IOException { + + if (state.numDocs > 0) { + // It's possible that all documents seen in this segment + // hit non-aborting exceptions, in which case we will + // not have yet init'd the FieldsWriter: initFieldsWriter(); fill(state.numDocs); } @@ -67,23 +84,9 @@ final class StoredFieldsWriter { int allocCount; - synchronized PerDoc getPerDoc() { - if (freeCount == 0) { - allocCount++; - if (allocCount > docFreeList.length) { - // Grow our free list up front to make sure we have - // enough space to recycle all outstanding PerDoc - // instances - assert allocCount == 1+docFreeList.length; - docFreeList = new PerDoc[ArrayUtil.oversize(allocCount, RamUsageEstimator.NUM_BYTES_OBJECT_REF)]; - } - return new PerDoc(); - } else { - return docFreeList[--freeCount]; - } - } + void abort() { + reset(); - synchronized void abort() { if (fieldsWriter != null) { fieldsWriter.abort(); fieldsWriter = null; @@ -101,53 +104,40 @@ final class StoredFieldsWriter { } } - synchronized void finishDocument(PerDoc perDoc) throws IOException { + void finishDocument() throws IOException { assert docWriter.writer.testPoint("StoredFieldsWriter.finishDocument start"); + initFieldsWriter(); + fill(docState.docID); - fill(perDoc.docID); + if (fieldsWriter != null && numStoredFields > 0) { + fieldsWriter.startDocument(numStoredFields); + for (int i = 0; i < numStoredFields; i++) { + fieldsWriter.writeField(fieldNumbers[i], storedFields[i]); + } + lastDocID++; + } - // Append stored fields to the real FieldsWriter: - fieldsWriter.flushDocument(perDoc.numStoredFields, perDoc.fdt); - lastDocID++; - perDoc.reset(); - free(perDoc); + reset(); assert docWriter.writer.testPoint("StoredFieldsWriter.finishDocument end"); } - synchronized void free(PerDoc perDoc) { - assert freeCount < docFreeList.length; - assert 0 == perDoc.numStoredFields; - assert 0 == perDoc.fdt.length(); - assert 0 == perDoc.fdt.getFilePointer(); - docFreeList[freeCount++] = perDoc; - } - - class PerDoc extends DocumentsWriter.DocWriter { - final DocumentsWriter.PerDocBuffer buffer = docWriter.newPerDocBuffer(); - RAMOutputStream fdt = new RAMOutputStream(buffer); - int numStoredFields; - - void reset() { - fdt.reset(); - buffer.recycle(); - numStoredFields = 0; + public void addField(Fieldable field, FieldInfo fieldInfo) throws IOException { + if (numStoredFields == storedFields.length) { + int newSize = ArrayUtil.oversize(numStoredFields + 1, RamUsageEstimator.NUM_BYTES_OBJECT_REF); + Fieldable[] newArray = new Fieldable[newSize]; + System.arraycopy(storedFields, 0, newArray, 0, numStoredFields); + storedFields = newArray; } - @Override - void abort() { - reset(); - free(this); + if (numStoredFields == fieldNumbers.length) { + fieldNumbers = ArrayUtil.grow(fieldNumbers); } - @Override - public long sizeInBytes() { - return buffer.getSizeInBytes(); - } + storedFields[numStoredFields] = field; + fieldNumbers[numStoredFields] = fieldInfo.number; + numStoredFields++; - @Override - public void finish() throws IOException { - finishDocument(this); - } + assert docState.testPoint("StoredFieldsWriterPerThread.processFields.writeField"); } } diff --git a/lucene/src/java/org/apache/lucene/index/StoredFieldsWriterPerThread.java b/lucene/src/java/org/apache/lucene/index/StoredFieldsWriterPerThread.java deleted file mode 100644 index 85c6b57583b..00000000000 --- a/lucene/src/java/org/apache/lucene/index/StoredFieldsWriterPerThread.java +++ /dev/null @@ -1,79 +0,0 @@ -package org.apache.lucene.index; - -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import java.io.IOException; -import org.apache.lucene.store.IndexOutput; -import org.apache.lucene.document.Fieldable; - -final class StoredFieldsWriterPerThread { - - final FieldsWriter localFieldsWriter; - final StoredFieldsWriter storedFieldsWriter; - final DocumentsWriter.DocState docState; - - StoredFieldsWriter.PerDoc doc; - - public StoredFieldsWriterPerThread(DocumentsWriter.DocState docState, StoredFieldsWriter storedFieldsWriter) throws IOException { - this.storedFieldsWriter = storedFieldsWriter; - this.docState = docState; - localFieldsWriter = new FieldsWriter((IndexOutput) null, (IndexOutput) null); - } - - public void startDocument() { - if (doc != null) { - // Only happens if previous document hit non-aborting - // exception while writing stored fields into - // localFieldsWriter: - doc.reset(); - doc.docID = docState.docID; - } - } - - public void addField(Fieldable field, FieldInfo fieldInfo) throws IOException { - if (doc == null) { - doc = storedFieldsWriter.getPerDoc(); - doc.docID = docState.docID; - localFieldsWriter.setFieldsStream(doc.fdt); - assert doc.numStoredFields == 0: "doc.numStoredFields=" + doc.numStoredFields; - assert 0 == doc.fdt.length(); - assert 0 == doc.fdt.getFilePointer(); - } - - localFieldsWriter.writeField(fieldInfo, field); - assert docState.testPoint("StoredFieldsWriterPerThread.processFields.writeField"); - doc.numStoredFields++; - } - - public DocumentsWriter.DocWriter finishDocument() { - // If there were any stored fields in this doc, doc will - // be non-null; else it's null. - try { - return doc; - } finally { - doc = null; - } - } - - public void abort() { - if (doc != null) { - doc.abort(); - doc = null; - } - } -} diff --git a/lucene/src/java/org/apache/lucene/index/TermVectorsTermsWriter.java b/lucene/src/java/org/apache/lucene/index/TermVectorsTermsWriter.java index a5d631efc53..da43f3ad311 100644 --- a/lucene/src/java/org/apache/lucene/index/TermVectorsTermsWriter.java +++ b/lucene/src/java/org/apache/lucene/index/TermVectorsTermsWriter.java @@ -17,49 +17,48 @@ package org.apache.lucene.index; * limitations under the License. */ +import java.io.IOException; +import java.util.Map; + import org.apache.lucene.store.IndexOutput; -import org.apache.lucene.store.RAMOutputStream; import org.apache.lucene.util.ArrayUtil; +import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.RamUsageEstimator; -import java.io.IOException; -import java.util.Collection; - -import java.util.Map; - final class TermVectorsTermsWriter extends TermsHashConsumer { - final DocumentsWriter docWriter; - PerDoc[] docFreeList = new PerDoc[1]; + final DocumentsWriterPerThread docWriter; int freeCount; IndexOutput tvx; IndexOutput tvd; IndexOutput tvf; int lastDocID; + + final DocumentsWriterPerThread.DocState docState; + final BytesRef flushTerm = new BytesRef(); + + // Used by perField when serializing the term vectors + final ByteSliceReader vectorSliceReader = new ByteSliceReader(); boolean hasVectors; - public TermVectorsTermsWriter(DocumentsWriter docWriter) { + public TermVectorsTermsWriter(DocumentsWriterPerThread docWriter) { this.docWriter = docWriter; + docState = docWriter.docState; } @Override - public TermsHashConsumerPerThread addThread(TermsHashPerThread termsHashPerThread) { - return new TermVectorsTermsWriterPerThread(termsHashPerThread, this); - } - - @Override - synchronized void flush(Map> threadsAndFields, final SegmentWriteState state) throws IOException { + void flush(Map fieldsToFlush, final SegmentWriteState state) throws IOException { if (tvx != null) { // At least one doc in this run had term vectors enabled fill(state.numDocs); + assert state.segmentName != null; + String idxName = IndexFileNames.segmentFileName(state.segmentName, "", IndexFileNames.VECTORS_INDEX_EXTENSION); tvx.close(); tvf.close(); tvd.close(); tvx = tvd = tvf = null; - assert state.segmentName != null; - String idxName = IndexFileNames.segmentFileName(state.segmentName, "", IndexFileNames.VECTORS_INDEX_EXTENSION); - if (4 + ((long) state.numDocs) * 16 != state.directory.fileLength(idxName)) { + if (4+((long) state.numDocs)*16 != state.directory.fileLength(idxName)) { throw new RuntimeException("after flush: tvx size mismatch: " + state.numDocs + " docs vs " + state.directory.fileLength(idxName) + " length in bytes of " + idxName + " file exists?=" + state.directory.fileExists(idxName)); } @@ -68,33 +67,10 @@ final class TermVectorsTermsWriter extends TermsHashConsumer { hasVectors = false; } - for (Map.Entry> entry : threadsAndFields.entrySet()) { - for (final TermsHashConsumerPerField field : entry.getValue() ) { - TermVectorsTermsWriterPerField perField = (TermVectorsTermsWriterPerField) field; - perField.termsHashPerField.reset(); - perField.shrinkHash(); - } - - TermVectorsTermsWriterPerThread perThread = (TermVectorsTermsWriterPerThread) entry.getKey(); - perThread.termsHashPerThread.reset(true); - } - } - - int allocCount; - - synchronized PerDoc getPerDoc() { - if (freeCount == 0) { - allocCount++; - if (allocCount > docFreeList.length) { - // Grow our free list up front to make sure we have - // enough space to recycle all outstanding PerDoc - // instances - assert allocCount == 1+docFreeList.length; - docFreeList = new PerDoc[ArrayUtil.oversize(allocCount, RamUsageEstimator.NUM_BYTES_OBJECT_REF)]; - } - return new PerDoc(); - } else { - return docFreeList[--freeCount]; + for (final TermsHashConsumerPerField field : fieldsToFlush.values() ) { + TermVectorsTermsWriterPerField perField = (TermVectorsTermsWriterPerField) field; + perField.termsHashPerField.reset(); + perField.shrinkHash(); } } @@ -112,18 +88,17 @@ final class TermVectorsTermsWriter extends TermsHashConsumer { } } - synchronized void initTermVectorsWriter() throws IOException { + private final void initTermVectorsWriter() throws IOException { if (tvx == null) { // If we hit an exception while init'ing the term // vector output files, we must abort this segment // because those files will be in an unknown // state: - hasVectors = true; tvx = docWriter.directory.createOutput(IndexFileNames.segmentFileName(docWriter.getSegment(), "", IndexFileNames.VECTORS_INDEX_EXTENSION)); tvd = docWriter.directory.createOutput(IndexFileNames.segmentFileName(docWriter.getSegment(), "", IndexFileNames.VECTORS_DOCUMENTS_EXTENSION)); tvf = docWriter.directory.createOutput(IndexFileNames.segmentFileName(docWriter.getSegment(), "", IndexFileNames.VECTORS_FIELDS_EXTENSION)); - + tvx.writeInt(TermVectorsReader.FORMAT_CURRENT); tvd.writeInt(TermVectorsReader.FORMAT_CURRENT); tvf.writeInt(TermVectorsReader.FORMAT_CURRENT); @@ -132,39 +107,44 @@ final class TermVectorsTermsWriter extends TermsHashConsumer { } } - synchronized void finishDocument(PerDoc perDoc) throws IOException { + @Override + void finishDocument(TermsHash termsHash) throws IOException { assert docWriter.writer.testPoint("TermVectorsTermsWriter.finishDocument start"); - initTermVectorsWriter(); - - fill(perDoc.docID); - - // Append term vectors to the real outputs: - tvx.writeLong(tvd.getFilePointer()); - tvx.writeLong(tvf.getFilePointer()); - tvd.writeVInt(perDoc.numVectorFields); - if (perDoc.numVectorFields > 0) { - for(int i=0;i 0) { + for(int i=0;i= 0; - if (!doVectors || numPostings == 0) - return; - if (numPostings > maxNumPostings) maxNumPostings = numPostings; - final IndexOutput tvf = perThread.doc.perDocTvf; - // This is called once, after inverting all occurrences // of a given field in the doc. At this point we flush // our hash into the DocWriter. assert fieldInfo.storeTermVector; - assert perThread.vectorFieldsInOrder(fieldInfo); + assert termsWriter.vectorFieldsInOrder(fieldInfo); - perThread.doc.addField(termsHashPerField.fieldInfo.number); TermVectorsPostingsArray postings = (TermVectorsPostingsArray) termsHashPerField.postingsArray; + final IndexOutput tvf = termsWriter.tvf; // TODO: we may want to make this sort in same order // as Codec's terms dict? @@ -140,21 +128,21 @@ final class TermVectorsTermsWriterPerField extends TermsHashConsumerPerField { byte bits = 0x0; if (doVectorPositions) bits |= TermVectorsReader.STORE_POSITIONS_WITH_TERMVECTOR; - if (doVectorOffsets) + if (doVectorOffsets) bits |= TermVectorsReader.STORE_OFFSET_WITH_TERMVECTOR; tvf.writeByte(bits); int lastLen = 0; byte[] lastBytes = null; int lastStart = 0; - - final ByteSliceReader reader = perThread.vectorSliceReader; - final ByteBlockPool termBytePool = perThread.termsHashPerThread.termBytePool; + + final ByteSliceReader reader = termsWriter.vectorSliceReader; + final ByteBlockPool termBytePool = termsHashPerField.termBytePool; for(int j=0;j> threadsAndFields, final SegmentWriteState state) throws IOException { - Map> childThreadsAndFields = new HashMap>(); - Map> nextThreadsAndFields; - - if (nextTermsHash != null) - nextThreadsAndFields = new HashMap>(); - else - nextThreadsAndFields = null; - - for (final Map.Entry> entry : threadsAndFields.entrySet()) { - - TermsHashPerThread perThread = (TermsHashPerThread) entry.getKey(); - - Collection fields = entry.getValue(); - - Iterator fieldsIt = fields.iterator(); - Collection childFields = new HashSet(); - Collection nextChildFields; - - if (nextTermsHash != null) - nextChildFields = new HashSet(); - else - nextChildFields = null; - - while(fieldsIt.hasNext()) { - TermsHashPerField perField = (TermsHashPerField) fieldsIt.next(); - childFields.add(perField.consumer); - if (nextTermsHash != null) - nextChildFields.add(perField.nextPerField); + reset(); + try { + consumer.abort(); + } finally { + if (nextTermsHash != null) { + nextTermsHash.abort(); } - - childThreadsAndFields.put(perThread.consumer, childFields); - if (nextTermsHash != null) - nextThreadsAndFields.put(perThread.nextPerThread, nextChildFields); } - - consumer.flush(childThreadsAndFields, state); + } - if (nextTermsHash != null) - nextTermsHash.flush(nextThreadsAndFields, state); + // Clear all state + void reset() { + intPool.reset(); + bytePool.reset(); + + if (primary) { + bytePool.reset(); + } } @Override - synchronized public boolean freeRAM() { + void flush(Map fieldsToFlush, final SegmentWriteState state) throws IOException { + Map childFields = new HashMap(); + Map nextChildFields; + + if (nextTermsHash != null) { + nextChildFields = new HashMap(); + } else { + nextChildFields = null; + } + + for (final Map.Entry entry : fieldsToFlush.entrySet()) { + TermsHashPerField perField = (TermsHashPerField) entry.getValue(); + childFields.put(entry.getKey(), perField.consumer); + if (nextTermsHash != null) { + nextChildFields.put(entry.getKey(), perField.nextPerField); + } + } + + consumer.flush(childFields, state); + + if (nextTermsHash != null) { + nextTermsHash.flush(nextChildFields, state); + } + } + + @Override + InvertedDocConsumerPerField addField(DocInverterPerField docInverterPerField, final FieldInfo fieldInfo) { + return new TermsHashPerField(docInverterPerField, this, nextTermsHash, fieldInfo); + } + + @Override + public boolean freeRAM() { return false; } + + @Override + void finishDocument() throws IOException { + try { + consumer.finishDocument(this); + } finally { + if (nextTermsHash != null) { + nextTermsHash.consumer.finishDocument(nextTermsHash); + } + } + } + + @Override + void startDocument() throws IOException { + consumer.startDocument(); + if (nextTermsHash != null) { + nextTermsHash.consumer.startDocument(); + } + } } diff --git a/lucene/src/java/org/apache/lucene/index/TermsHashConsumer.java b/lucene/src/java/org/apache/lucene/index/TermsHashConsumer.java index 6488d332ce8..3ec6ec28a3c 100644 --- a/lucene/src/java/org/apache/lucene/index/TermsHashConsumer.java +++ b/lucene/src/java/org/apache/lucene/index/TermsHashConsumer.java @@ -18,11 +18,12 @@ package org.apache.lucene.index; */ import java.io.IOException; -import java.util.Collection; import java.util.Map; abstract class TermsHashConsumer { - abstract TermsHashConsumerPerThread addThread(TermsHashPerThread perThread); - abstract void flush(Map> threadsAndFields, final SegmentWriteState state) throws IOException; + abstract void flush(Map fieldsToFlush, final SegmentWriteState state) throws IOException; abstract void abort(); - } + abstract void startDocument() throws IOException; + abstract void finishDocument(TermsHash termsHash) throws IOException; + abstract public TermsHashConsumerPerField addField(TermsHashPerField termsHashPerField, FieldInfo fieldInfo); +} diff --git a/lucene/src/java/org/apache/lucene/index/TermsHashPerField.java b/lucene/src/java/org/apache/lucene/index/TermsHashPerField.java index 0b3ec241c72..f3d705e4433 100644 --- a/lucene/src/java/org/apache/lucene/index/TermsHashPerField.java +++ b/lucene/src/java/org/apache/lucene/index/TermsHashPerField.java @@ -34,9 +34,10 @@ final class TermsHashPerField extends InvertedDocConsumerPerField { final TermsHashConsumerPerField consumer; + final TermsHash termsHash; + final TermsHashPerField nextPerField; - final TermsHashPerThread perThread; - final DocumentsWriter.DocState docState; + final DocumentsWriterPerThread.DocState docState; final FieldInvertState fieldState; TermToBytesRefAttribute termAtt; BytesRef termBytesRef; @@ -52,27 +53,27 @@ final class TermsHashPerField extends InvertedDocConsumerPerField { final FieldInfo fieldInfo; final BytesRefHash bytesHash; - + ParallelPostingsArray postingsArray; private final AtomicLong bytesUsed; - public TermsHashPerField(DocInverterPerField docInverterPerField, final TermsHashPerThread perThread, final TermsHashPerThread nextPerThread, final FieldInfo fieldInfo) { - this.perThread = perThread; - intPool = perThread.intPool; - bytePool = perThread.bytePool; - termBytePool = perThread.termBytePool; - docState = perThread.docState; - bytesUsed = perThread.termsHash.trackAllocations?perThread.termsHash.docWriter.bytesUsed:new AtomicLong(); - + public TermsHashPerField(DocInverterPerField docInverterPerField, final TermsHash termsHash, final TermsHash nextTermsHash, final FieldInfo fieldInfo) { + intPool = termsHash.intPool; + bytePool = termsHash.bytePool; + termBytePool = termsHash.termBytePool; + docState = termsHash.docState; + this.termsHash = termsHash; + bytesUsed = termsHash.trackAllocations ? termsHash.docWriter.bytesUsed + : new AtomicLong(); fieldState = docInverterPerField.fieldState; - this.consumer = perThread.consumer.addField(this, fieldInfo); + this.consumer = termsHash.consumer.addField(this, fieldInfo); PostingsBytesStartArray byteStarts = new PostingsBytesStartArray(this, bytesUsed); - bytesHash = new BytesRefHash(termBytePool, HASH_INIT_SIZE, byteStarts); + bytesHash = new BytesRefHash(termBytePool, HASH_INIT_SIZE, byteStarts); streamCount = consumer.getStreamCount(); numPostingInt = 2*streamCount; this.fieldInfo = fieldInfo; - if (nextPerThread != null) - nextPerField = (TermsHashPerField) nextPerThread.addField(docInverterPerField, fieldInfo); + if (nextTermsHash != null) + nextPerField = (TermsHashPerField) nextTermsHash.addField(docInverterPerField, fieldInfo); else nextPerField = null; } @@ -80,7 +81,7 @@ final class TermsHashPerField extends InvertedDocConsumerPerField { void shrinkHash(int targetSize) { // Fully free the bytesHash on each flush but keep the pool untouched // bytesHash.clear will clear the ByteStartArray and in turn the ParallelPostingsArray too - bytesHash.clear(false); + bytesHash.clear(false); } public void reset() { @@ -90,7 +91,7 @@ final class TermsHashPerField extends InvertedDocConsumerPerField { } @Override - synchronized public void abort() { + public void abort() { reset(); if (nextPerField != null) nextPerField.abort(); @@ -99,14 +100,13 @@ final class TermsHashPerField extends InvertedDocConsumerPerField { public void initReader(ByteSliceReader reader, int termID, int stream) { assert stream < streamCount; int intStart = postingsArray.intStarts[termID]; - final int[] ints = intPool.buffers[intStart >> DocumentsWriter.INT_BLOCK_SHIFT]; - final int upto = intStart & DocumentsWriter.INT_BLOCK_MASK; + final int[] ints = intPool.buffers[intStart >> DocumentsWriterPerThread.INT_BLOCK_SHIFT]; + final int upto = intStart & DocumentsWriterPerThread.INT_BLOCK_MASK; reader.init(bytePool, postingsArray.byteStarts[termID]+stream*ByteBlockPool.FIRST_LEVEL_SIZE, ints[upto+stream]); } - /** Collapse the hash table & sort in-place. */ public int[] sortPostings(Comparator termComp) { return bytesHash.sort(termComp); @@ -124,7 +124,7 @@ final class TermsHashPerField extends InvertedDocConsumerPerField { nextPerField.start(f); } } - + @Override boolean start(Fieldable[] fields, int count) throws IOException { doCall = consumer.start(fields, count); @@ -143,11 +143,12 @@ final class TermsHashPerField extends InvertedDocConsumerPerField { // First time we are seeing this token since we last // flushed the hash. // Init stream slices - if (numPostingInt + intPool.intUpto > DocumentsWriter.INT_BLOCK_SIZE) + if (numPostingInt + intPool.intUpto > DocumentsWriterPerThread.INT_BLOCK_SIZE) intPool.nextBuffer(); - if (ByteBlockPool.BYTE_BLOCK_SIZE - bytePool.byteUpto < numPostingInt*ByteBlockPool.FIRST_LEVEL_SIZE) + if (ByteBlockPool.BYTE_BLOCK_SIZE - bytePool.byteUpto < numPostingInt*ByteBlockPool.FIRST_LEVEL_SIZE) { bytePool.nextBuffer(); + } intUptos = intPool.buffer; intUptoStart = intPool.intUpto; @@ -166,8 +167,8 @@ final class TermsHashPerField extends InvertedDocConsumerPerField { } else { termID = (-termID)-1; int intStart = postingsArray.intStarts[termID]; - intUptos = intPool.buffers[intStart >> DocumentsWriter.INT_BLOCK_SHIFT]; - intUptoStart = intStart & DocumentsWriter.INT_BLOCK_MASK; + intUptos = intPool.buffers[intStart >> DocumentsWriterPerThread.INT_BLOCK_SHIFT]; + intUptoStart = intStart & DocumentsWriterPerThread.INT_BLOCK_MASK; consumer.addTerm(termID); } } @@ -192,7 +193,7 @@ final class TermsHashPerField extends InvertedDocConsumerPerField { if (docState.maxTermPrefix == null) { final int saved = termBytesRef.length; try { - termBytesRef.length = Math.min(30, DocumentsWriter.MAX_TERM_LENGTH_UTF8); + termBytesRef.length = Math.min(30, DocumentsWriterPerThread.MAX_TERM_LENGTH_UTF8); docState.maxTermPrefix = termBytesRef.toString(); } finally { termBytesRef.length = saved; @@ -204,7 +205,7 @@ final class TermsHashPerField extends InvertedDocConsumerPerField { if (termID >= 0) {// New posting bytesHash.byteStart(termID); // Init stream slices - if (numPostingInt + intPool.intUpto > DocumentsWriter.INT_BLOCK_SIZE) { + if (numPostingInt + intPool.intUpto > DocumentsWriterPerThread.INT_BLOCK_SIZE) { intPool.nextBuffer(); } @@ -229,8 +230,8 @@ final class TermsHashPerField extends InvertedDocConsumerPerField { } else { termID = (-termID)-1; final int intStart = postingsArray.intStarts[termID]; - intUptos = intPool.buffers[intStart >> DocumentsWriter.INT_BLOCK_SHIFT]; - intUptoStart = intStart & DocumentsWriter.INT_BLOCK_MASK; + intUptos = intPool.buffers[intStart >> DocumentsWriterPerThread.INT_BLOCK_SHIFT]; + intUptoStart = intStart & DocumentsWriterPerThread.INT_BLOCK_MASK; consumer.addTerm(termID); } @@ -278,7 +279,7 @@ final class TermsHashPerField extends InvertedDocConsumerPerField { if (nextPerField != null) nextPerField.finish(); } - + private static final class PostingsBytesStartArray extends BytesStartArray { private final TermsHashPerField perField; @@ -289,10 +290,10 @@ final class TermsHashPerField extends InvertedDocConsumerPerField { this.perField = perField; this.bytesUsed = bytesUsed; } - + @Override public int[] init() { - if(perField.postingsArray == null) { + if(perField.postingsArray == null) { perField.postingsArray = perField.consumer.createPostingsArray(2); bytesUsed.addAndGet(perField.postingsArray.size * perField.postingsArray.bytesPerPosting()); } @@ -312,7 +313,7 @@ final class TermsHashPerField extends InvertedDocConsumerPerField { @Override public int[] clear() { if(perField.postingsArray != null) { - bytesUsed.addAndGet(-perField.postingsArray.size * perField.postingsArray.bytesPerPosting()); + bytesUsed.addAndGet(-(perField.postingsArray.size * perField.postingsArray.bytesPerPosting())); perField.postingsArray = null; } return null; diff --git a/lucene/src/java/org/apache/lucene/index/TermsHashPerThread.java b/lucene/src/java/org/apache/lucene/index/TermsHashPerThread.java deleted file mode 100644 index 1a0c4299781..00000000000 --- a/lucene/src/java/org/apache/lucene/index/TermsHashPerThread.java +++ /dev/null @@ -1,96 +0,0 @@ -package org.apache.lucene.index; - -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import org.apache.lucene.util.ByteBlockPool; - -import java.io.IOException; - -final class TermsHashPerThread extends InvertedDocConsumerPerThread { - - final TermsHash termsHash; - final TermsHashConsumerPerThread consumer; - final TermsHashPerThread nextPerThread; // the secondary is currently consumed by TermVectorsWriter - // see secondary entry point in TermsHashPerField#add(int) - - final IntBlockPool intPool; - final ByteBlockPool bytePool; - final ByteBlockPool termBytePool; - - final boolean primary; - final DocumentsWriter.DocState docState; - - public TermsHashPerThread(DocInverterPerThread docInverterPerThread, final TermsHash termsHash, final TermsHash nextTermsHash, final TermsHashPerThread primaryPerThread) { - docState = docInverterPerThread.docState; - - this.termsHash = termsHash; - this.consumer = termsHash.consumer.addThread(this); - - intPool = new IntBlockPool(termsHash.docWriter); - bytePool = new ByteBlockPool(termsHash.docWriter.byteBlockAllocator); // use the allocator from the docWriter which tracks the used bytes - primary = nextTermsHash != null; - if (primary) { - // We are primary - termBytePool = bytePool; - nextPerThread = nextTermsHash.addThread(docInverterPerThread, this); // this will be the primaryPerThread in the secondary - assert nextPerThread != null; - } else { - assert primaryPerThread != null; - termBytePool = primaryPerThread.bytePool; // we are secondary and share the byte pool with the primary - nextPerThread = null; - } - } - - @Override - InvertedDocConsumerPerField addField(DocInverterPerField docInverterPerField, final FieldInfo fieldInfo) { - return new TermsHashPerField(docInverterPerField, this, nextPerThread, fieldInfo); - } - - @Override - synchronized public void abort() { - reset(true); - consumer.abort(); - if (primary) - nextPerThread.abort(); - } - - @Override - public void startDocument() throws IOException { - consumer.startDocument(); - if (primary) - nextPerThread.consumer.startDocument(); - } - - @Override - public DocumentsWriter.DocWriter finishDocument() throws IOException { - final DocumentsWriter.DocWriter doc = consumer.finishDocument(); - final DocumentsWriter.DocWriter docFromSecondary = primary? nextPerThread.consumer.finishDocument():null; - if (doc == null) - return docFromSecondary; - else { - doc.setNext(docFromSecondary); - return doc; - } - } - - // Clear all state - void reset(boolean recyclePostings) { - intPool.reset(); - bytePool.reset(); - } -} diff --git a/lucene/src/java/org/apache/lucene/index/ThreadAffinityDocumentsWriterThreadPool.java b/lucene/src/java/org/apache/lucene/index/ThreadAffinityDocumentsWriterThreadPool.java new file mode 100644 index 00000000000..f478f29df1f --- /dev/null +++ b/lucene/src/java/org/apache/lucene/index/ThreadAffinityDocumentsWriterThreadPool.java @@ -0,0 +1,85 @@ +package org.apache.lucene.index; +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +import java.util.Map; +import java.util.concurrent.ConcurrentHashMap; + +import org.apache.lucene.document.Document; +import org.apache.lucene.index.DocumentsWriterPerThreadPool.ThreadState; //javadoc + +/** + * A {@link DocumentsWriterPerThreadPool} implementation that tries to assign an + * indexing thread to the same {@link ThreadState} each time the thread tries to + * obtain a {@link ThreadState}. Once a new {@link ThreadState} is created it is + * associated with the creating thread. Subsequently, if the threads associated + * {@link ThreadState} is not in use it will be associated with the requesting + * thread. Otherwise, if the {@link ThreadState} is used by another thread + * {@link ThreadAffinityDocumentsWriterThreadPool} tries to find the currently + * minimal contended {@link ThreadState}. + */ +public class ThreadAffinityDocumentsWriterThreadPool extends DocumentsWriterPerThreadPool { + private Map threadBindings = new ConcurrentHashMap(); + + /** + * Creates a new {@link DocumentsWriterPerThreadPool} with max. + * {@link #DEFAULT_MAX_THREAD_STATES} thread states. + */ + public ThreadAffinityDocumentsWriterThreadPool() { + this(DEFAULT_MAX_THREAD_STATES); + } + + public ThreadAffinityDocumentsWriterThreadPool(int maxNumPerThreads) { + super(maxNumPerThreads); + assert getMaxThreadStates() >= 1; + } + + @Override + public ThreadState getAndLock(Thread requestingThread, DocumentsWriter documentsWriter, Document doc) { + ThreadState threadState = threadBindings.get(requestingThread); + if (threadState != null) { + if (threadState.tryLock()) { + return threadState; + } + } + ThreadState minThreadState = null; + + + /* TODO -- another thread could lock the minThreadState we just got while + we should somehow prevent this. */ + // Find the state that has minimum number of threads waiting + minThreadState = minContendedThreadState(); + if (minThreadState == null || minThreadState.hasQueuedThreads()) { + final ThreadState newState = newThreadState(); // state is already locked if non-null + if (newState != null) { + assert newState.isHeldByCurrentThread(); + threadBindings.put(requestingThread, newState); + return newState; + } else if (minThreadState == null) { + /* + * no new threadState available we just take the minContented one + * This must return a valid thread state since we accessed the + * synced context in newThreadState() above. + */ + minThreadState = minContendedThreadState(); + } + } + assert minThreadState != null: "ThreadState is null"; + + minThreadState.lock(); + return minThreadState; + } +} diff --git a/lucene/src/java/org/apache/lucene/index/TieredMergePolicy.java b/lucene/src/java/org/apache/lucene/index/TieredMergePolicy.java new file mode 100644 index 00000000000..a070ce0f8c4 --- /dev/null +++ b/lucene/src/java/org/apache/lucene/index/TieredMergePolicy.java @@ -0,0 +1,667 @@ +package org.apache.lucene.index; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.util.Set; +import java.util.Collection; +import java.util.Collections; +import java.util.HashSet; +import java.util.Comparator; + +/** + * Merges segments of approximately equal size, subject to + * an allowed number of segments per tier. This is similar + * to {@link LogByteSizeMergePolicy}, except this merge + * policy is able to merge non-adjacent segment, and + * separates how many segments are merged at once ({@link + * #setMaxMergeAtOnce}) from how many segments are allowed + * per tier ({@link #setSegmentsPerTier}). This merge + * policy also does not over-merge (ie, cascade merges). + * + *

For normal merging, this policy first computes a + * "budget" of how many segments are allowed by be in the + * index. If the index is over-budget, then the policy + * sorts segments by decresing size (pro-rating by percent + * deletes), and then finds the least-cost merge. Merge + * cost is measured by a combination of the "skew" of the + * merge (size of largest seg divided by smallest seg), + * total merge size and pct deletes reclaimed, + * so that merges with lower skew, smaller size + * and those reclaiming more deletes, are + * favored. + * + *

If a merge will produce a segment that's larger than + * {@link #setMaxMergedSegmentMB}, then the policy will + * merge fewer segments (down to 1 at once, if that one has + * deletions) to keep the segment size under budget. + * + * NOTE: this policy freely merges non-adjacent + * segments; if this is a problem, use {@link + * LogMergePolicy}. + * + *

NOTE: This policy always merges by byte size + * of the segments, always pro-rates by percent deletes, + * and does not apply any maximum segment size during + * optimize (unlike {@link LogByteSizeMergePolicy}. + * + * @lucene.experimental + */ + +// TODO +// - we could try to take into account whether a large +// merge is already running (under CMS) and then bias +// ourselves towards picking smaller merges if so (or, +// maybe CMS should do so) + +public class TieredMergePolicy extends MergePolicy { + + private int maxMergeAtOnce = 10; + private long maxMergedSegmentBytes = 5*1024*1024*1024L; + private int maxMergeAtOnceExplicit = 30; + + private long floorSegmentBytes = 2*1024*1024L; + private double segsPerTier = 10.0; + private double expungeDeletesPctAllowed = 10.0; + private boolean useCompoundFile = true; + private double noCFSRatio = 0.1; + + /** Maximum number of segments to be merged at a time + * during "normal" merging. For explicit merging (eg, + * optimize or expungeDeletes was called), see {@link + * #setMaxMergeAtOnceExplicit}. Default is 10. */ + public TieredMergePolicy setMaxMergeAtOnce(int v) { + if (v < 2) { + throw new IllegalArgumentException("maxMergeAtOnce must be > 1 (got " + v + ")"); + } + maxMergeAtOnce = v; + return this; + } + + /** @see #setMaxMergeAtOnce */ + public int getMaxMergeAtOnce() { + return maxMergeAtOnce; + } + + // TODO: should addIndexes do explicit merging, too? And, + // if user calls IW.maybeMerge "explicitly" + + /** Maximum number of segments to be merged at a time, + * during optimize or expungeDeletes. Default is 30. */ + public TieredMergePolicy setMaxMergeAtOnceExplicit(int v) { + if (v < 2) { + throw new IllegalArgumentException("maxMergeAtOnceExplicit must be > 1 (got " + v + ")"); + } + maxMergeAtOnceExplicit = v; + return this; + } + + /** @see #setMaxMergeAtOnceExplicit */ + public int getMaxMergeAtOnceExplicit() { + return maxMergeAtOnceExplicit; + } + + /** Maximum sized segment to produce during + * normal merging. This setting is approximate: the + * estimate of the merged segment size is made by summing + * sizes of to-be-merged segments (compensating for + * percent deleted docs). Default is 5 GB. */ + public TieredMergePolicy setMaxMergedSegmentMB(double v) { + maxMergedSegmentBytes = (long) (v*1024*1024); + return this; + } + + /** @see #getMaxMergedSegmentMB */ + public double getMaxMergedSegmentMB() { + return maxMergedSegmentBytes/1024/1024.; + } + + /** Segments smaller than this are "rounded up" to this + * size, ie treated as equal (floor) size for merge + * selection. This is to prevent frequent flushing of + * tiny segments from allowing a long tail in the index. + * Default is 2 MB. */ + public TieredMergePolicy setFloorSegmentMB(double v) { + if (v <= 0.0) { + throw new IllegalArgumentException("floorSegmentMB must be >= 0.0 (got " + v + ")"); + } + floorSegmentBytes = (long) (v*1024*1024); + return this; + } + + /** @see #setFloorSegmentMB */ + public double getFloorSegmentMB() { + return floorSegmentBytes/1024*1024.; + } + + /** When expungeDeletes is called, we only merge away a + * segment if its delete percentage is over this + * threshold. Default is 10%. */ + public TieredMergePolicy setExpungeDeletesPctAllowed(double v) { + if (v < 0.0 || v > 100.0) { + throw new IllegalArgumentException("expungeDeletesPctAllowed must be between 0.0 and 100.0 inclusive (got " + v + ")"); + } + expungeDeletesPctAllowed = v; + return this; + } + + /** @see #setExpungeDeletesPctAllowed */ + public double getExpungeDeletesPctAllowed() { + return expungeDeletesPctAllowed; + } + + /** Sets the allowed number of segments per tier. Smaller + * values mean more merging but fewer segments. + * setMaxMergeAtOnce} otherwise you'll hit + * Default is 10.0. */ + public TieredMergePolicy setSegmentsPerTier(double v) { + if (v < 2.0) { + throw new IllegalArgumentException("segmentsPerTier must be >= 2.0 (got " + v + ")"); + } + segsPerTier = v; + return this; + } + + /** @see #setSegmentsPerTier */ + public double getSegmentsPerTier() { + return segsPerTier; + } + + /** Sets whether compound file format should be used for + * newly flushed and newly merged segments. Default + * true. */ + public TieredMergePolicy setUseCompoundFile(boolean useCompoundFile) { + this.useCompoundFile = useCompoundFile; + return this; + } + + /** @see #setUseCompoundFile */ + public boolean getUseCompoundFile() { + return useCompoundFile; + } + + /** If a merged segment will be more than this percentage + * of the total size of the index, leave the segment as + * non-compound file even if compound file is enabled. + * Set to 1.0 to always use CFS regardless of merge + * size. Default is 0.1. */ + public TieredMergePolicy setNoCFSRatio(double noCFSRatio) { + if (noCFSRatio < 0.0 || noCFSRatio > 1.0) { + throw new IllegalArgumentException("noCFSRatio must be 0.0 to 1.0 inclusive; got " + noCFSRatio); + } + this.noCFSRatio = noCFSRatio; + return this; + } + + /** @see #setNoCFSRatio */ + public double getNoCFSRatio() { + return noCFSRatio; + } + + private class SegmentByteSizeDescending implements Comparator { + public int compare(SegmentInfo o1, SegmentInfo o2) { + try { + final long sz1 = size(o1); + final long sz2 = size(o2); + if (sz1 > sz2) { + return -1; + } else if (sz2 > sz1) { + return 1; + } else { + return o1.name.compareTo(o2.name); + } + } catch (IOException ioe) { + throw new RuntimeException(ioe); + } + } + } + + private final Comparator segmentByteSizeDescending = new SegmentByteSizeDescending(); + + protected static abstract class MergeScore { + abstract double getScore(); + abstract String getExplanation(); + } + + @Override + public MergeSpecification findMerges(SegmentInfos infos) throws IOException { + if (verbose()) { + message("findMerges: " + infos.size() + " segments"); + } + if (infos.size() == 0) { + return null; + } + final Collection merging = writer.get().getMergingSegments(); + final Collection toBeMerged = new HashSet(); + + final SegmentInfos infosSorted = new SegmentInfos(); + infosSorted.addAll(infos); + + Collections.sort(infosSorted, segmentByteSizeDescending); + + // Compute total index bytes & print details about the index + long totIndexBytes = 0; + long minSegmentBytes = Long.MAX_VALUE; + for(SegmentInfo info : infosSorted) { + final long segBytes = size(info); + if (verbose()) { + String extra = merging.contains(info) ? " [merging]" : ""; + if (segBytes >= maxMergedSegmentBytes/2.0) { + extra += " [skip: too large]"; + } else if (segBytes < floorSegmentBytes) { + extra += " [floored]"; + } + message(" seg=" + writer.get().segString(info) + " size=" + String.format("%.3f", segBytes/1024/1024.) + " MB" + extra); + } + + minSegmentBytes = Math.min(segBytes, minSegmentBytes); + // Accum total byte size + totIndexBytes += segBytes; + } + + // If we have too-large segments, grace them out + // of the maxSegmentCount: + int tooBigCount = 0; + while (tooBigCount < infosSorted.size() && size(infosSorted.info(tooBigCount)) >= maxMergedSegmentBytes/2.0) { + totIndexBytes -= size(infosSorted.get(tooBigCount)); + tooBigCount++; + } + + minSegmentBytes = floorSize(minSegmentBytes); + + // Compute max allowed segs in the index + long levelSize = minSegmentBytes; + long bytesLeft = totIndexBytes; + double allowedSegCount = 0; + while(true) { + final double segCountLevel = bytesLeft / (double) levelSize; + if (segCountLevel < segsPerTier) { + allowedSegCount += Math.ceil(segCountLevel); + break; + } + allowedSegCount += segsPerTier; + bytesLeft -= segsPerTier * levelSize; + levelSize *= maxMergeAtOnce; + } + int allowedSegCountInt = (int) allowedSegCount; + + MergeSpecification spec = null; + + // Cycle to possibly select more than one merge: + while(true) { + + long mergingBytes = 0; + + // Gather eligible segments for merging, ie segments + // not already being merged and not already picked (by + // prior iteration of this loop) for merging: + final SegmentInfos eligible = new SegmentInfos(); + for(int idx = tooBigCount; idx= maxMergedSegmentBytes; + + message(" allowedSegmentCount=" + allowedSegCountInt + " vs count=" + infosSorted.size() + " (eligible count=" + eligible.size() + ") tooBigCount=" + tooBigCount); + + if (eligible.size() == 0) { + return spec; + } + + if (eligible.size() >= allowedSegCountInt) { + + // OK we are over budget -- find best merge! + MergeScore bestScore = null; + SegmentInfos best = null; + boolean bestTooLarge = false; + long bestMergeBytes = 0; + + // Consider all merge starts: + for(int startIdx = 0;startIdx <= eligible.size()-maxMergeAtOnce; startIdx++) { + + long totAfterMergeBytes = 0; + + final SegmentInfos candidate = new SegmentInfos(); + boolean hitTooLarge = false; + for(int idx = startIdx;idx maxMergedSegmentBytes) { + hitTooLarge = true; + // NOTE: we continue, so that we can try + // "packing" smaller segments into this merge + // to see if we can get closer to the max + // size; this in general is not perfect since + // this is really "bin packing" and we'd have + // to try different permutations. + continue; + } + candidate.add(info); + totAfterMergeBytes += segBytes; + } + + final MergeScore score = score(candidate, hitTooLarge, mergingBytes); + message(" maybe=" + writer.get().segString(candidate) + " score=" + score.getScore() + " " + score.getExplanation() + " tooLarge=" + hitTooLarge + " size=" + String.format("%.3f MB", totAfterMergeBytes/1024./1024.)); + + // If we are already running a max sized merge + // (maxMergeIsRunning), don't allow another max + // sized merge to kick off: + if ((bestScore == null || score.getScore() < bestScore.getScore()) && (!hitTooLarge || !maxMergeIsRunning)) { + best = candidate; + bestScore = score; + bestTooLarge = hitTooLarge; + bestMergeBytes = totAfterMergeBytes; + } + } + + if (best != null) { + if (spec == null) { + spec = new MergeSpecification(); + } + final OneMerge merge = new OneMerge(best); + spec.add(merge); + for(SegmentInfo info : merge.segments) { + toBeMerged.add(info); + } + + if (verbose()) { + message(" add merge=" + writer.get().segString(merge.segments) + " size=" + String.format("%.3f MB", bestMergeBytes/1024./1024.) + " score=" + String.format("%.3f", bestScore.getScore()) + " " + bestScore.getExplanation() + (bestTooLarge ? " [max merge]" : "")); + } + } else { + return spec; + } + } else { + return spec; + } + } + } + + /** Expert: scores one merge; subclasses can override. */ + protected MergeScore score(SegmentInfos candidate, boolean hitTooLarge, long mergingBytes) throws IOException { + long totBeforeMergeBytes = 0; + long totAfterMergeBytes = 0; + long totAfterMergeBytesFloored = 0; + for(SegmentInfo info : candidate) { + final long segBytes = size(info); + totAfterMergeBytes += segBytes; + totAfterMergeBytesFloored += floorSize(segBytes); + totBeforeMergeBytes += info.sizeInBytes(true); + } + + // Measure "skew" of the merge, which can range + // from 1.0/numSegsBeingMerged (good) to 1.0 + // (poor): + final double skew; + if (hitTooLarge) { + // Pretend the merge has perfect skew; skew doesn't + // matter in this case because this merge will not + // "cascade" and so it cannot lead to N^2 merge cost + // over time: + skew = 1.0/maxMergeAtOnce; + } else { + skew = ((double) floorSize(size(candidate.info(0))))/totAfterMergeBytesFloored; + } + + // Strongly favor merges with less skew (smaller + // mergeScore is better): + double mergeScore = skew; + + // Gently favor smaller merges over bigger ones. We + // don't want to make this exponent too large else we + // can end up doing poor merges of small segments in + // order to avoid the large merges: + mergeScore *= Math.pow(totAfterMergeBytes, 0.05); + + // Strongly favor merges that reclaim deletes: + final double nonDelRatio = ((double) totAfterMergeBytes)/totBeforeMergeBytes; + mergeScore *= nonDelRatio; + + final double finalMergeScore = mergeScore; + + return new MergeScore() { + + @Override + public double getScore() { + return finalMergeScore; + } + + @Override + public String getExplanation() { + return "skew=" + String.format("%.3f", skew) + " nonDelRatio=" + String.format("%.3f", nonDelRatio); + } + }; + } + + @Override + public MergeSpecification findMergesForOptimize(SegmentInfos infos, int maxSegmentCount, Set segmentsToOptimize) throws IOException { + if (verbose()) { + message("findMergesForOptimize maxSegmentCount=" + maxSegmentCount + " infos=" + writer.get().segString(infos) + " segmentsToOptimize=" + segmentsToOptimize); + } + SegmentInfos eligible = new SegmentInfos(); + boolean optimizeMergeRunning = false; + final Collection merging = writer.get().getMergingSegments(); + for(SegmentInfo info : infos) { + if (segmentsToOptimize.contains(info)) { + if (!merging.contains(info)) { + eligible.add(info); + } else { + optimizeMergeRunning = true; + } + } + } + + if (eligible.size() == 0) { + return null; + } + + if ((maxSegmentCount > 1 && eligible.size() <= maxSegmentCount) || + (maxSegmentCount == 1 && eligible.size() == 1 && isOptimized(eligible.get(0)))) { + if (verbose()) { + message("already optimized"); + } + return null; + } + + Collections.sort(eligible, segmentByteSizeDescending); + + if (verbose()) { + message("eligible=" + eligible); + message("optimizeMergeRunning=" + optimizeMergeRunning); + } + + int end = eligible.size(); + + MergeSpecification spec = null; + + // Do full merges, first, backwards: + while(end >= maxMergeAtOnceExplicit + maxSegmentCount - 1) { + if (spec == null) { + spec = new MergeSpecification(); + } + final OneMerge merge = new OneMerge(eligible.range(end-maxMergeAtOnceExplicit, end)); + if (verbose()) { + message("add merge=" + writer.get().segString(merge.segments)); + } + spec.add(merge); + end -= maxMergeAtOnceExplicit; + } + + if (spec == null && !optimizeMergeRunning) { + // Do final merge + final int numToMerge = end - maxSegmentCount + 1; + final OneMerge merge = new OneMerge(eligible.range(end-numToMerge, end)); + if (verbose()) { + message("add final merge=" + merge.segString(writer.get().getDirectory())); + } + spec = new MergeSpecification(); + spec.add(merge); + } + + return spec; + } + + @Override + public MergeSpecification findMergesToExpungeDeletes(SegmentInfos infos) + throws CorruptIndexException, IOException { + if (verbose()) { + message("findMergesToExpungeDeletes infos=" + writer.get().segString(infos) + " expungeDeletesPctAllowed=" + expungeDeletesPctAllowed); + } + final SegmentInfos eligible = new SegmentInfos(); + final Collection merging = writer.get().getMergingSegments(); + for(SegmentInfo info : infos) { + double pctDeletes = 100.*((double) writer.get().numDeletedDocs(info))/info.docCount; + if (pctDeletes > expungeDeletesPctAllowed && !merging.contains(info)) { + eligible.add(info); + } + } + + if (eligible.size() == 0) { + return null; + } + + Collections.sort(eligible, segmentByteSizeDescending); + + if (verbose()) { + message("eligible=" + eligible); + } + + int start = 0; + MergeSpecification spec = null; + + while(start < eligible.size()) { + long totAfterMergeBytes = 0; + int upto = start; + boolean done = false; + while(upto < start + maxMergeAtOnceExplicit) { + if (upto == eligible.size()) { + done = true; + break; + } + final SegmentInfo info = eligible.get(upto); + final long segBytes = size(info); + if (totAfterMergeBytes + segBytes > maxMergedSegmentBytes) { + // TODO: we could be smarter here, eg cherry + // picking smaller merges that'd sum up to just + // around the max size + break; + } + totAfterMergeBytes += segBytes; + upto++; + } + + if (upto == start) { + // Single segment is too big; grace it + start++; + continue; + } + + if (spec == null) { + spec = new MergeSpecification(); + } + + final OneMerge merge = new OneMerge(eligible.range(start, upto)); + if (verbose()) { + message("add merge=" + writer.get().segString(merge.segments)); + } + spec.add(merge); + start = upto; + if (done) { + break; + } + } + + return spec; + } + + @Override + public boolean useCompoundFile(SegmentInfos infos, SegmentInfo mergedInfo) throws IOException { + final boolean doCFS; + + if (!useCompoundFile) { + doCFS = false; + } else if (noCFSRatio == 1.0) { + doCFS = true; + } else { + long totalSize = 0; + for (SegmentInfo info : infos) + totalSize += size(info); + + doCFS = size(mergedInfo) <= noCFSRatio * totalSize; + } + return doCFS; + } + + @Override + public void close() { + } + + private boolean isOptimized(SegmentInfo info) + throws IOException { + IndexWriter w = writer.get(); + assert w != null; + boolean hasDeletions = w.numDeletedDocs(info) > 0; + return !hasDeletions && + !info.hasSeparateNorms() && + info.dir == w.getDirectory() && + (info.getUseCompoundFile() == useCompoundFile || noCFSRatio < 1.0); + } + + // Segment size in bytes, pro-rated by % deleted + private long size(SegmentInfo info) throws IOException { + final long byteSize = info.sizeInBytes(true); + final int delCount = writer.get().numDeletedDocs(info); + final double delRatio = (info.docCount <= 0 ? 0.0f : ((double)delCount / (double)info.docCount)); + assert delRatio <= 1.0; + return (long) (byteSize * (1.0-delRatio)); + } + + private long floorSize(long bytes) { + return Math.max(floorSegmentBytes, bytes); + } + + private boolean verbose() { + IndexWriter w = writer.get(); + return w != null && w.verbose(); + } + + private void message(String message) { + if (verbose()) { + writer.get().message("TMP: " + message); + } + } + + @Override + public String toString() { + StringBuilder sb = new StringBuilder("[" + getClass().getSimpleName() + ": "); + sb.append("maxMergeAtOnce=").append(maxMergeAtOnce).append(", "); + sb.append("maxMergeAtOnceExplicit=").append(maxMergeAtOnceExplicit).append(", "); + sb.append("maxMergedSegmentMB=").append(maxMergedSegmentBytes/1024/1024.).append(", "); + sb.append("floorSegmentMB=").append(floorSegmentBytes/1024/1024.).append(", "); + sb.append("expungeDeletesPctAllowed=").append(expungeDeletesPctAllowed).append(", "); + sb.append("segmentsPerTier=").append(segsPerTier).append(", "); + sb.append("useCompoundFile=").append(useCompoundFile).append(", "); + sb.append("noCFSRatio=").append(noCFSRatio); + return sb.toString(); + } +} \ No newline at end of file diff --git a/lucene/src/java/org/apache/lucene/index/codecs/BlockTermsReader.java b/lucene/src/java/org/apache/lucene/index/codecs/BlockTermsReader.java index 2a62f2d0f4b..8d16a6c33b0 100644 --- a/lucene/src/java/org/apache/lucene/index/codecs/BlockTermsReader.java +++ b/lucene/src/java/org/apache/lucene/index/codecs/BlockTermsReader.java @@ -152,10 +152,10 @@ public class BlockTermsReader extends FieldsProducer { } protected void readHeader(IndexInput input) throws IOException { - CodecUtil.checkHeader(in, BlockTermsWriter.CODEC_NAME, + CodecUtil.checkHeader(input, BlockTermsWriter.CODEC_NAME, BlockTermsWriter.VERSION_START, BlockTermsWriter.VERSION_CURRENT); - dirOffset = in.readLong(); + dirOffset = input.readLong(); } protected void seekDir(IndexInput input, long dirOffset) @@ -239,11 +239,6 @@ public class BlockTermsReader extends FieldsProducer { public TermsEnum terms() throws IOException { return current.iterator(); } - - @Override - public DocValues docValues() throws IOException { - return null; - } } private class FieldReader extends Terms implements Closeable { @@ -848,6 +843,11 @@ public class BlockTermsReader extends FieldsProducer { private void decodeMetaData() throws IOException { //System.out.println("BTR.decodeMetadata mdUpto=" + metaDataUpto + " vs termCount=" + state.termCount + " state=" + state); if (!seekPending) { + // TODO: cutover to random-access API + // here.... really stupid that we have to decode N + // wasted term metadata just to get to the N+1th + // that we really need... + // lazily catch up on metadata decode: final int limit = state.termCount; // We must set/incr state.termCount because diff --git a/lucene/src/java/org/apache/lucene/index/codecs/Codec.java b/lucene/src/java/org/apache/lucene/index/codecs/Codec.java index 720ec737723..464f736dc17 100644 --- a/lucene/src/java/org/apache/lucene/index/codecs/Codec.java +++ b/lucene/src/java/org/apache/lucene/index/codecs/Codec.java @@ -20,6 +20,7 @@ package org.apache.lucene.index.codecs; import java.io.IOException; import java.util.Set; +import org.apache.lucene.index.PerDocWriteState; import org.apache.lucene.index.SegmentInfo; import org.apache.lucene.index.SegmentWriteState; import org.apache.lucene.index.SegmentReadState; @@ -50,6 +51,14 @@ public abstract class Codec { * returns, it must hold open any files it will need to * use; else, those files may be deleted. */ public abstract FieldsProducer fieldsProducer(SegmentReadState state) throws IOException; + + public PerDocConsumer docsConsumer(PerDocWriteState state) throws IOException { + return null; + } + + public PerDocValues docsProducer(SegmentReadState state) throws IOException { + return null; + } /** * Gathers files associated with this segment diff --git a/lucene/src/java/org/apache/lucene/index/codecs/FieldsConsumer.java b/lucene/src/java/org/apache/lucene/index/codecs/FieldsConsumer.java index 33fd19d732c..c042b7c99d6 100644 --- a/lucene/src/java/org/apache/lucene/index/codecs/FieldsConsumer.java +++ b/lucene/src/java/org/apache/lucene/index/codecs/FieldsConsumer.java @@ -21,8 +21,6 @@ import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.Fields; import org.apache.lucene.index.FieldsEnum; import org.apache.lucene.index.TermsEnum; -import org.apache.lucene.index.codecs.docvalues.DocValuesConsumer; -import org.apache.lucene.index.values.DocValues; import java.io.IOException; import java.io.Closeable; @@ -39,11 +37,6 @@ public abstract class FieldsConsumer implements Closeable { /** Add a new field */ public abstract TermsConsumer addField(FieldInfo field) throws IOException; - /** Adds a new DocValuesField */ - public DocValuesConsumer addValuesField(FieldInfo field) throws IOException { - throw new UnsupportedOperationException("docvalues are not supported"); - } - /** Called when we are done adding everything. */ public abstract void close() throws IOException; @@ -59,18 +52,6 @@ public abstract class FieldsConsumer implements Closeable { final TermsConsumer termsConsumer = addField(mergeState.fieldInfo); termsConsumer.merge(mergeState, terms); } - if (mergeState.fieldInfo.hasDocValues()) { - final DocValues docValues = fieldsEnum.docValues(); - if(docValues == null) { - /* It is actually possible that a fieldInfo has a values type but no values are actually available. - * this can happen if there are already segments without values around. - */ - continue; - } - final DocValuesConsumer docValuesConsumer = addValuesField(mergeState.fieldInfo); - assert docValuesConsumer != null; - docValuesConsumer.merge(mergeState, docValues); - } } } diff --git a/lucene/src/java/org/apache/lucene/index/codecs/FieldsProducer.java b/lucene/src/java/org/apache/lucene/index/codecs/FieldsProducer.java index a4ce963828b..d170699d72b 100644 --- a/lucene/src/java/org/apache/lucene/index/codecs/FieldsProducer.java +++ b/lucene/src/java/org/apache/lucene/index/codecs/FieldsProducer.java @@ -37,11 +37,6 @@ public abstract class FieldsProducer extends Fields implements Closeable { public abstract void close() throws IOException; public abstract void loadTermsIndex(int indexDivisor) throws IOException; - @Override - public DocValues docValues(String field) throws IOException { - return null; - } - public static final FieldsProducer EMPTY = new FieldsProducer() { @Override diff --git a/lucene/src/java/org/apache/lucene/index/codecs/MultiLevelSkipListReader.java b/lucene/src/java/org/apache/lucene/index/codecs/MultiLevelSkipListReader.java index b75aa478a5a..bef7f11414a 100644 --- a/lucene/src/java/org/apache/lucene/index/codecs/MultiLevelSkipListReader.java +++ b/lucene/src/java/org/apache/lucene/index/codecs/MultiLevelSkipListReader.java @@ -184,9 +184,21 @@ public abstract class MultiLevelSkipListReader { } } + /** returns x == 0 ? 0 : Math.floor(Math.log(x) / Math.log(base)) */ + static int log(int x, int base) { + assert base >= 2; + int ret = 0; + long n = base; // needs to be a long to avoid overflow + while (x >= n) { + n *= base; + ret++; + } + return ret; + } + /** Loads the skip levels */ private void loadSkipLevels() throws IOException { - numberOfSkipLevels = docCount == 0 ? 0 : (int) Math.floor(Math.log(docCount) / Math.log(skipInterval[0])); + numberOfSkipLevels = log(docCount, skipInterval[0]); if (numberOfSkipLevels > maxNumberOfSkipLevels) { numberOfSkipLevels = maxNumberOfSkipLevels; } diff --git a/lucene/src/java/org/apache/lucene/index/codecs/MultiLevelSkipListWriter.java b/lucene/src/java/org/apache/lucene/index/codecs/MultiLevelSkipListWriter.java index 46cf791ccb3..476fdd24d91 100644 --- a/lucene/src/java/org/apache/lucene/index/codecs/MultiLevelSkipListWriter.java +++ b/lucene/src/java/org/apache/lucene/index/codecs/MultiLevelSkipListWriter.java @@ -61,7 +61,7 @@ public abstract class MultiLevelSkipListWriter { this.skipInterval = skipInterval; // calculate the maximum number of skip levels for this document frequency - numberOfSkipLevels = df == 0 ? 0 : (int) Math.floor(Math.log(df) / Math.log(skipInterval)); + numberOfSkipLevels = MultiLevelSkipListReader.log(df, skipInterval); // make sure it does not exceed maxSkipLevels if (numberOfSkipLevels > maxSkipLevels) { diff --git a/lucene/src/java/org/apache/lucene/index/codecs/PerDocConsumer.java b/lucene/src/java/org/apache/lucene/index/codecs/PerDocConsumer.java new file mode 100644 index 00000000000..81c11af1a35 --- /dev/null +++ b/lucene/src/java/org/apache/lucene/index/codecs/PerDocConsumer.java @@ -0,0 +1,59 @@ +package org.apache.lucene.index.codecs; +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to You under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ +import java.io.Closeable; +import java.io.IOException; + +import org.apache.lucene.index.FieldInfo; +import org.apache.lucene.index.codecs.docvalues.DocValuesConsumer; +import org.apache.lucene.index.values.DocValues; + +/** + * nocommit - javadoc + * @experimental + * + */ +public abstract class PerDocConsumer implements Closeable{ + /** Adds a new DocValuesField */ + public abstract DocValuesConsumer addValuesField(FieldInfo field) + throws IOException; + + public void merge(MergeState mergeState, PerDocValues producer) + throws IOException { + Iterable fields = producer.fields(); + for (String field : fields) { + mergeState.fieldInfo = mergeState.fieldInfos.fieldInfo(field); + assert mergeState.fieldInfo != null : "FieldInfo for field is null: " + + field; + if (mergeState.fieldInfo.hasDocValues()) { + final DocValues docValues = producer.docValues(field); + if (docValues == null) { + /* + * It is actually possible that a fieldInfo has a values type but no + * values are actually available. this can happen if there are already + * segments without values around. + */ + continue; + } + final DocValuesConsumer docValuesConsumer = addValuesField(mergeState.fieldInfo); + assert docValuesConsumer != null; + docValuesConsumer.merge(mergeState, docValues); + } + } + + } +} diff --git a/lucene/src/java/org/apache/lucene/index/DocFieldConsumerPerThread.java b/lucene/src/java/org/apache/lucene/index/codecs/PerDocValues.java similarity index 54% rename from lucene/src/java/org/apache/lucene/index/DocFieldConsumerPerThread.java rename to lucene/src/java/org/apache/lucene/index/codecs/PerDocValues.java index c8bc1641f44..cfeda8f40c9 100644 --- a/lucene/src/java/org/apache/lucene/index/DocFieldConsumerPerThread.java +++ b/lucene/src/java/org/apache/lucene/index/codecs/PerDocValues.java @@ -1,5 +1,4 @@ -package org.apache.lucene.index; - +package org.apache.lucene.index.codecs; /** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with @@ -16,12 +15,30 @@ package org.apache.lucene.index; * See the License for the specific language governing permissions and * limitations under the License. */ - +import java.io.Closeable; import java.io.IOException; +import java.util.Collection; -abstract class DocFieldConsumerPerThread { - abstract void startDocument() throws IOException; - abstract DocumentsWriter.DocWriter finishDocument() throws IOException; - abstract DocFieldConsumerPerField addField(FieldInfo fi); - abstract void abort(); +import org.apache.lucene.index.values.DocValues; + +/** + * + * nocommit javadoc + * @experimental + */ +public abstract class PerDocValues implements Closeable { + /** + * Returns {@link DocValues} for the current field. + * + * @param field + * the field name + * @return the {@link DocValues} for this field or null if not + * applicable. + * @throws IOException + */ + public abstract DocValues docValues(String field) throws IOException; + + public static final PerDocValues[] EMPTY_ARRAY = new PerDocValues[0]; + + public abstract Collection fields(); } diff --git a/lucene/src/java/org/apache/lucene/index/codecs/VariableGapTermsIndexReader.java b/lucene/src/java/org/apache/lucene/index/codecs/VariableGapTermsIndexReader.java index 6980335aeea..5f8d3aa2db6 100644 --- a/lucene/src/java/org/apache/lucene/index/codecs/VariableGapTermsIndexReader.java +++ b/lucene/src/java/org/apache/lucene/index/codecs/VariableGapTermsIndexReader.java @@ -18,6 +18,9 @@ package org.apache.lucene.index.codecs; */ import java.io.IOException; +import java.io.FileOutputStream; // for toDot +import java.io.OutputStreamWriter; // for toDot +import java.io.Writer; // for toDot import java.util.Collection; import java.util.HashMap; import java.util.Iterator; @@ -34,6 +37,7 @@ import org.apache.lucene.util.automaton.fst.Builder; import org.apache.lucene.util.automaton.fst.BytesRefFSTEnum; import org.apache.lucene.util.automaton.fst.FST; import org.apache.lucene.util.automaton.fst.PositiveIntOutputs; +import org.apache.lucene.util.automaton.fst.Util; // for toDot /** See {@link VariableGapTermsIndexWriter} * @@ -52,11 +56,12 @@ public class VariableGapTermsIndexReader extends TermsIndexReaderBase { // start of the field info data protected long dirOffset; + final String segment; public VariableGapTermsIndexReader(Directory dir, FieldInfos fieldInfos, String segment, int indexDivisor, int codecId) throws IOException { in = dir.openInput(IndexFileNames.segmentFileName(segment, ""+codecId, VariableGapTermsIndexWriter.TERMS_INDEX_EXTENSION)); - + this.segment = segment; boolean success = false; try { @@ -153,15 +158,11 @@ public class VariableGapTermsIndexReader extends TermsIndexReaderBase { private final class FieldIndexData { - private final FieldInfo fieldInfo; private final long indexStart; - // Set only if terms index is loaded: private volatile FST fst; public FieldIndexData(FieldInfo fieldInfo, long indexStart) throws IOException { - - this.fieldInfo = fieldInfo; this.indexStart = indexStart; if (indexDivisor > 0) { @@ -176,6 +177,14 @@ public class VariableGapTermsIndexReader extends TermsIndexReaderBase { fst = new FST(clone, fstOutputs); clone.close(); + /* + final String dotFileName = segment + "_" + fieldInfo.name + ".dot"; + Writer w = new OutputStreamWriter(new FileOutputStream(dotFileName)); + Util.toDot(fst, w, false, false); + System.out.println("FST INDEX: SAVED to " + dotFileName); + w.close(); + */ + if (indexDivisor > 1) { // subsample final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(true); diff --git a/lucene/src/java/org/apache/lucene/index/codecs/docvalues/DocValuesCodec.java b/lucene/src/java/org/apache/lucene/index/codecs/docvalues/DocValuesCodec.java index 335b0d849f4..60ddccaa52d 100644 --- a/lucene/src/java/org/apache/lucene/index/codecs/docvalues/DocValuesCodec.java +++ b/lucene/src/java/org/apache/lucene/index/codecs/docvalues/DocValuesCodec.java @@ -18,27 +18,22 @@ package org.apache.lucene.index.codecs.docvalues; */ import java.io.IOException; import java.util.Comparator; -import java.util.Iterator; import java.util.Set; -import java.util.Map.Entry; import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.FieldInfos; -import org.apache.lucene.index.FieldsEnum; import org.apache.lucene.index.IndexFileNames; +import org.apache.lucene.index.PerDocWriteState; import org.apache.lucene.index.SegmentInfo; import org.apache.lucene.index.SegmentReadState; import org.apache.lucene.index.SegmentWriteState; -import org.apache.lucene.index.Terms; -import org.apache.lucene.index.TermsEnum; import org.apache.lucene.index.codecs.Codec; import org.apache.lucene.index.codecs.FieldsConsumer; import org.apache.lucene.index.codecs.FieldsProducer; -import org.apache.lucene.index.codecs.TermsConsumer; -import org.apache.lucene.index.values.DocValues; +import org.apache.lucene.index.codecs.PerDocConsumer; +import org.apache.lucene.index.codecs.PerDocValues; import org.apache.lucene.index.values.Writer; import org.apache.lucene.store.Directory; -import org.apache.lucene.util.AttributeSource; import org.apache.lucene.util.BytesRef; /** @@ -60,80 +55,46 @@ public class DocValuesCodec extends Codec { } @Override - public FieldsConsumer fieldsConsumer(SegmentWriteState state) + public PerDocConsumer docsConsumer(final PerDocWriteState state) throws IOException { - final WrappingFieldsConsumer consumer; - consumer = new WrappingFieldsConsumer(other, comparator, state); - // nocommit this is a hack and only necessary since - // we want to initialized the wrapped - // fieldsConsumer lazily with a SegmentWriteState created after the docvalue - // ones is. We should fix this in DocumentWriter I guess. See - // DocFieldProcessor too! - return consumer; + return new PerDocConsumer() { + + @Override + public void close() throws IOException { + } + + @Override + public DocValuesConsumer addValuesField(FieldInfo field) + throws IOException { + final DocValuesConsumer consumer = Writer.create(field.getDocValues(), + docValuesId(state.segmentName, state.codecId, field.number), + // TODO can we have a compound file per segment and codec for + // docvalues? + state.directory, comparator, state.bytesUsed); + return consumer; + } + }; } - private static class WrappingFieldsConsumer extends FieldsConsumer { - private final SegmentWriteState state; - private FieldsConsumer wrappedConsumer; - private final Codec other; - private final Comparator comparator; - - public WrappingFieldsConsumer(Codec other, Comparator comparator, SegmentWriteState state) { - this.other = other; - this.comparator = comparator; - this.state = state; + @Override + public PerDocValues docsProducer(SegmentReadState state) throws IOException { + try { + return new DocValuesProducerBase(state.segmentInfo, state.dir, state.fieldInfos, state.codecId); + }catch (IOException e) { + return new DocValuesProducerBase(state.segmentInfo, state.dir, state.fieldInfos, state.codecId); } + } - @Override - public void close() throws IOException { - synchronized (this) { - if (wrappedConsumer != null) { - wrappedConsumer.close(); - } - } - - } - - @Override - public synchronized DocValuesConsumer addValuesField(FieldInfo field) - throws IOException { - final DocValuesConsumer consumer = Writer.create(field.getDocValues(), docValuesId(state.segmentName, state.codecId, field.number), - // TODO can we have a compound file per segment and codec for - // docvalues? - state.directory, comparator, state.bytesUsed); - return consumer; - } - - @Override - public TermsConsumer addField(FieldInfo field) throws IOException { - synchronized (this) { - if (wrappedConsumer == null) - wrappedConsumer = other.fieldsConsumer(state); - } - return wrappedConsumer.addField(field); - } + @Override + public FieldsConsumer fieldsConsumer(SegmentWriteState state) + throws IOException { + return other.fieldsConsumer(state); } @Override public FieldsProducer fieldsProducer(SegmentReadState state) throws IOException { - final FieldInfos fieldInfos = state.fieldInfos; - boolean indexed = false; - boolean docValues = false; - for (FieldInfo fieldInfo : fieldInfos) { - if (fieldInfo.getCodecId() == state.codecId) { - indexed |= fieldInfo.isIndexed; - docValues |= fieldInfo.hasDocValues(); - if (indexed && docValues) - break; - } - } - final FieldsProducer indexedProducer = indexed ? other.fieldsProducer(state) : FieldsProducer.EMPTY; - if (docValues) { - return new WrappingFielsdProducer(state, indexedProducer); - } else { - return FieldsProducer.EMPTY; - } + return other.fieldsProducer(state); } static String docValuesId(String segmentsName, int codecID, int fieldId) { @@ -187,140 +148,4 @@ public class DocValuesCodec extends Codec { extensions.add(Writer.DATA_EXTENSION); extensions.add(Writer.INDEX_EXTENSION); } - - static class WrappingFielsdProducer extends DocValuesProducerBase { - - private final FieldsProducer other; - - WrappingFielsdProducer(SegmentReadState state, FieldsProducer other) - throws IOException { - super(state.segmentInfo, state.dir, state.fieldInfos, state.codecId); - this.other = other; - } - - @Override - public void close() throws IOException { - try { - other.close(); - } finally { - super.close(); - } - } - - @Override - public void loadTermsIndex(int indexDivisor) throws IOException { - other.loadTermsIndex(indexDivisor); - } - - @Override - public FieldsEnum iterator() throws IOException { - return new WrappingFieldsEnum(other.iterator(), docValues.entrySet() - .iterator()); - } - - @Override - public Terms terms(String field) throws IOException { - return other.terms(field); - } - } - - static abstract class NameValue { - String name; - V value; - - NameValue smaller(NameValue other) throws IOException { - if (other.name == null) { - if (this.name == null) { - return null; - } - return this; - } else if (this.name == null) { - return other; - } - final int res = this.name.compareTo(other.name); - if (res < 0) - return this; - if (res == 0) - other.name = this.name; - return other; - } - - abstract NameValue next() throws IOException; - } - - static class FieldsEnumNameValue extends NameValue { - @Override - NameValue next() throws IOException { - name = value.next(); - return this; - } - } - - static class DocValueNameValue extends NameValue { - Iterator> iter; - - @Override - NameValue next() { - if (iter.hasNext()) { - Entry next = iter.next(); - value = next.getValue(); - name = next.getKey(); - } else { - name = null; - } - return this; - } - } - - static class WrappingFieldsEnum extends FieldsEnum { - private final DocValueNameValue docValues = new DocValueNameValue(); - private final NameValue fieldsEnum = new FieldsEnumNameValue(); - private NameValue coordinator; - - @Override - public AttributeSource attributes() { - return fieldsEnum.value.attributes(); - } - - public WrappingFieldsEnum(FieldsEnum wrapped, - Iterator> docValues) { - this.docValues.iter = docValues; - this.fieldsEnum.value = wrapped; - coordinator = null; - } - - @Override - public DocValues docValues() throws IOException { - if (docValues.name == coordinator.name) - return docValues.value; - return null; - } - - @Override - public String next() throws IOException { - if (coordinator == null) { - coordinator = fieldsEnum.next().smaller(docValues.next()); - } else { - String current = coordinator.name; - if (current == docValues.name) { - docValues.next(); - } - if (current == fieldsEnum.name) { - fieldsEnum.next(); - } - coordinator = docValues.smaller(fieldsEnum); - - } - return coordinator == null ? null : coordinator.name; - } - - @Override - public TermsEnum terms() throws IOException { - if (fieldsEnum.name == coordinator.name) { - return fieldsEnum.value.terms(); - } - return null; - } - } - } diff --git a/lucene/src/java/org/apache/lucene/index/codecs/docvalues/DocValuesProducerBase.java b/lucene/src/java/org/apache/lucene/index/codecs/docvalues/DocValuesProducerBase.java index 619027b4248..bb6c8556a3a 100644 --- a/lucene/src/java/org/apache/lucene/index/codecs/docvalues/DocValuesProducerBase.java +++ b/lucene/src/java/org/apache/lucene/index/codecs/docvalues/DocValuesProducerBase.java @@ -23,7 +23,7 @@ import java.util.TreeMap; import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.FieldInfos; import org.apache.lucene.index.SegmentInfo; -import org.apache.lucene.index.codecs.FieldsProducer; +import org.apache.lucene.index.codecs.PerDocValues; import org.apache.lucene.index.values.Bytes; import org.apache.lucene.index.values.DocValues; import org.apache.lucene.index.values.Floats; @@ -37,7 +37,7 @@ import org.apache.lucene.store.Directory; * * @lucene.experimental */ -public abstract class DocValuesProducerBase extends FieldsProducer { +public class DocValuesProducerBase extends PerDocValues { protected final TreeMap docValues = new TreeMap(); @@ -145,4 +145,9 @@ public abstract class DocValuesProducerBase extends FieldsProducer { throw ex; } } + + @Override + public Collection fields() { + return docValues.keySet(); + } } diff --git a/lucene/src/java/org/apache/lucene/index/codecs/preflex/PreFlexFields.java b/lucene/src/java/org/apache/lucene/index/codecs/preflex/PreFlexFields.java index d541cd19ca0..28b19a52090 100644 --- a/lucene/src/java/org/apache/lucene/index/codecs/preflex/PreFlexFields.java +++ b/lucene/src/java/org/apache/lucene/index/codecs/preflex/PreFlexFields.java @@ -37,7 +37,6 @@ import org.apache.lucene.index.Term; import org.apache.lucene.index.Terms; import org.apache.lucene.index.TermsEnum; import org.apache.lucene.index.codecs.FieldsProducer; -import org.apache.lucene.index.values.DocValues; import org.apache.lucene.store.Directory; import org.apache.lucene.store.IndexInput; import org.apache.lucene.util.Bits; @@ -236,12 +235,6 @@ public class PreFlexFields extends FieldsProducer { termsEnum.reset(current); return termsEnum; } - - @Override - public DocValues docValues() throws IOException { - //DocValues are not available on PreFlex indices - return null; - } } private class PreTerms extends Terms { diff --git a/lucene/src/java/org/apache/lucene/index/codecs/preflex/TermInfosReader.java b/lucene/src/java/org/apache/lucene/index/codecs/preflex/TermInfosReader.java index 8205e73b972..c5fd09b7741 100644 --- a/lucene/src/java/org/apache/lucene/index/codecs/preflex/TermInfosReader.java +++ b/lucene/src/java/org/apache/lucene/index/codecs/preflex/TermInfosReader.java @@ -310,7 +310,7 @@ public final class TermInfosReader { } } else { assert sameTermInfo(ti, tiOrd, enumerator); - assert (int) enumerator.position == tiOrd.termOrd; + assert enumerator.position == tiOrd.termOrd; } } else { ti = null; diff --git a/lucene/src/java/org/apache/lucene/index/codecs/pulsing/PulsingPostingsReaderImpl.java b/lucene/src/java/org/apache/lucene/index/codecs/pulsing/PulsingPostingsReaderImpl.java index f5d6aba7bf5..4b42caa244b 100644 --- a/lucene/src/java/org/apache/lucene/index/codecs/pulsing/PulsingPostingsReaderImpl.java +++ b/lucene/src/java/org/apache/lucene/index/codecs/pulsing/PulsingPostingsReaderImpl.java @@ -233,6 +233,7 @@ public class PulsingPostingsReaderImpl extends PostingsReaderBase { private Bits skipDocs; private int docID; private int freq; + private int payloadLength; public PulsingDocsEnum(FieldInfo fieldInfo) { omitTF = fieldInfo.omitTermFreqAndPositions; @@ -246,6 +247,7 @@ public class PulsingPostingsReaderImpl extends PostingsReaderBase { System.arraycopy(termState.postings, 0, bytes, 0, termState.postingsSize); postings.reset(bytes); docID = 0; + payloadLength = 0; freq = 1; this.skipDocs = skipDocs; return this; @@ -277,7 +279,6 @@ public class PulsingPostingsReaderImpl extends PostingsReaderBase { // Skip positions if (storePayloads) { - int payloadLength = -1; for(int pos=0;pos>> 1; // shift off low bit if ((code & 1) != 0) { // if low bit is set freq = 1; // freq is one } else { + //System.out.println(" read freq"); freq = postings.readVInt(); // else read freq } posPending = freq; @@ -400,10 +403,12 @@ public class PulsingPostingsReaderImpl extends PostingsReaderBase { @Override public int advance(int target) throws IOException { + //System.out.println("PR.advance target=" + target); int doc; while((doc=nextDoc()) != NO_MORE_DOCS) { + //System.out.println(" nextDoc got doc=" + doc); if (doc >= target) { - return doc; + return docID = doc; } } return docID = NO_MORE_DOCS; @@ -411,7 +416,7 @@ public class PulsingPostingsReaderImpl extends PostingsReaderBase { @Override public int nextPosition() throws IOException { - //System.out.println("PR d&p nextPosition posPending=" + posPending + " vs freq=" + freq); + //System.out.println("PR.nextPosition posPending=" + posPending + " vs freq=" + freq); assert posPending > 0; posPending--; @@ -421,6 +426,7 @@ public class PulsingPostingsReaderImpl extends PostingsReaderBase { //System.out.println("PR skip payload=" + payloadLength); postings.skipBytes(payloadLength); } + //System.out.println(" read pos code"); final int code = postings.readVInt(); //System.out.println("PR code=" + code); if ((code & 1) != 0) { @@ -433,16 +439,17 @@ public class PulsingPostingsReaderImpl extends PostingsReaderBase { position += postings.readVInt(); } - //System.out.println("PR d&p nextPos return pos=" + position + " this=" + this); + //System.out.println(" return pos=" + position + " hasPayload=" + !payloadRetrieved + " posPending=" + posPending + " this=" + this); return position; } private void skipPositions() throws IOException { + //System.out.println("PR.skipPositions: posPending=" + posPending); while(posPending != 0) { nextPosition(); } if (storePayloads && !payloadRetrieved) { - //System.out.println(" skip payload len=" + payloadLength); + //System.out.println(" skip last payload len=" + payloadLength); postings.skipBytes(payloadLength); payloadRetrieved = true; } diff --git a/lucene/src/java/org/apache/lucene/index/codecs/pulsing/PulsingPostingsWriterImpl.java b/lucene/src/java/org/apache/lucene/index/codecs/pulsing/PulsingPostingsWriterImpl.java index 35b2a3d0278..b5b4ce728d2 100644 --- a/lucene/src/java/org/apache/lucene/index/codecs/pulsing/PulsingPostingsWriterImpl.java +++ b/lucene/src/java/org/apache/lucene/index/codecs/pulsing/PulsingPostingsWriterImpl.java @@ -201,6 +201,7 @@ public final class PulsingPostingsWriterImpl extends PostingsWriterBase { if (!omitTF) { int lastDocID = 0; int pendingIDX = 0; + int lastPayloadLength = -1; while(pendingIDX < pendingCount) { final Position doc = pending[pendingIDX]; @@ -217,7 +218,6 @@ public final class PulsingPostingsWriterImpl extends PostingsWriterBase { } int lastPos = 0; - int lastPayloadLength = -1; for(int posIDX=0;posIDX { Query query = c.getQuery().rewrite(reader); // rewrite first if (getBoost() != 1.0f) { // incorporate boost - if (query == c.getQuery()) // if rewrite was no-op + if (query == c.getQuery()) { // if rewrite was no-op query = (Query)query.clone(); // then clone before boost + } + // Since the BooleanQuery only has 1 clause, the BooleanQuery will be + // written out. Therefore the rewritten Query's boost must incorporate both + // the clause's boost, and the boost of the BooleanQuery itself query.setBoost(getBoost() * query.getBoost()); } @@ -381,8 +385,12 @@ public class BooleanQuery extends Query implements Iterable { BooleanClause c = clauses.get(i); Query query = c.getQuery().rewrite(reader); if (query != c.getQuery()) { // clause rewrote: must clone - if (clone == null) + if (clone == null) { + // The BooleanQuery clone is lazily initialized so only initialize + // it if a rewritten clause differs from the original clause (and hasn't been + // initialized already). If nothing differs, the clone isn't needlessly created clone = (BooleanQuery)this.clone(); + } clone.clauses.set(i, new BooleanClause(query, c.getOccur())); } } diff --git a/lucene/src/java/org/apache/lucene/search/IndexSearcher.java b/lucene/src/java/org/apache/lucene/search/IndexSearcher.java index 1e319b24cc7..f199edc92c6 100644 --- a/lucene/src/java/org/apache/lucene/search/IndexSearcher.java +++ b/lucene/src/java/org/apache/lucene/search/IndexSearcher.java @@ -857,4 +857,9 @@ public class IndexSearcher { this.leaves = leaves; } } + + @Override + public String toString() { + return "IndexSearcher(" + reader + ")"; + } } diff --git a/lucene/src/java/org/apache/lucene/util/AttributeSource.java b/lucene/src/java/org/apache/lucene/util/AttributeSource.java index 9833fdd61c0..631de15f3a5 100644 --- a/lucene/src/java/org/apache/lucene/util/AttributeSource.java +++ b/lucene/src/java/org/apache/lucene/util/AttributeSource.java @@ -93,10 +93,33 @@ public class AttributeSource { } } + /** + * This class holds the state of an AttributeSource. + * @see #captureState + * @see #restoreState + */ + public static final class State implements Cloneable { + AttributeImpl attribute; + State next; + + @Override + public Object clone() { + State clone = new State(); + clone.attribute = (AttributeImpl) attribute.clone(); + + if (next != null) { + clone.next = (State) next.clone(); + } + + return clone; + } + } + // These two maps must always be in sync!!! // So they are private, final and read-only from the outside (read-only iterators) private final Map, AttributeImpl> attributes; private final Map, AttributeImpl> attributeImpls; + private final State[] currentState; private AttributeFactory factory; @@ -116,6 +139,7 @@ public class AttributeSource { } this.attributes = input.attributes; this.attributeImpls = input.attributeImpls; + this.currentState = input.currentState; this.factory = input.factory; } @@ -125,6 +149,7 @@ public class AttributeSource { public AttributeSource(AttributeFactory factory) { this.attributes = new LinkedHashMap, AttributeImpl>(); this.attributeImpls = new LinkedHashMap, AttributeImpl>(); + this.currentState = new State[1]; this.factory = factory; } @@ -147,11 +172,8 @@ public class AttributeSource { * if one instance implements more than one Attribute interface. */ public final Iterator getAttributeImplsIterator() { - if (hasAttributes()) { - if (currentState == null) { - computeCurrentState(); - } - final State initState = currentState; + final State initState = getCurrentState(); + if (initState != null) { return new Iterator() { private State state = initState; @@ -225,7 +247,7 @@ public class AttributeSource { // Attribute is a superclass of this interface if (!attributes.containsKey(curInterface)) { // invalidate state to force recomputation in captureState() - this.currentState = null; + this.currentState[0] = null; attributes.put(curInterface, att); attributeImpls.put(clazz, att); } @@ -283,41 +305,21 @@ public class AttributeSource { } return attClass.cast(attImpl); } - - /** - * This class holds the state of an AttributeSource. - * @see #captureState - * @see #restoreState - */ - public static final class State implements Cloneable { - AttributeImpl attribute; - State next; - @Override - public Object clone() { - State clone = new State(); - clone.attribute = (AttributeImpl) attribute.clone(); - - if (next != null) { - clone.next = (State) next.clone(); - } - - return clone; + private State getCurrentState() { + State s = currentState[0]; + if (s != null || !hasAttributes()) { + return s; } - } - - private State currentState = null; - - private void computeCurrentState() { - currentState = new State(); - State c = currentState; + State c = s = currentState[0] = new State(); final Iterator it = attributeImpls.values().iterator(); c.attribute = it.next(); while (it.hasNext()) { c.next = new State(); c = c.next; c.attribute = it.next(); - } + } + return s; } /** @@ -325,13 +327,8 @@ public class AttributeSource { * {@link AttributeImpl#clear()} on each Attribute implementation. */ public final void clearAttributes() { - if (hasAttributes()) { - if (currentState == null) { - computeCurrentState(); - } - for (State state = currentState; state != null; state = state.next) { - state.attribute.clear(); - } + for (State state = getCurrentState(); state != null; state = state.next) { + state.attribute.clear(); } } @@ -340,14 +337,8 @@ public class AttributeSource { * {@link #restoreState} to restore the state of this or another AttributeSource. */ public final State captureState() { - if (!hasAttributes()) { - return null; - } - - if (currentState == null) { - computeCurrentState(); - } - return (State) this.currentState.clone(); + final State state = this.getCurrentState(); + return (state == null) ? null : (State) state.clone(); } /** @@ -382,15 +373,9 @@ public class AttributeSource { @Override public int hashCode() { int code = 0; - if (hasAttributes()) { - if (currentState == null) { - computeCurrentState(); - } - for (State state = currentState; state != null; state = state.next) { - code = code * 31 + state.attribute.hashCode(); - } + for (State state = getCurrentState(); state != null; state = state.next) { + code = code * 31 + state.attribute.hashCode(); } - return code; } @@ -413,14 +398,8 @@ public class AttributeSource { } // it is only equal if all attribute impls are the same in the same order - if (this.currentState == null) { - this.computeCurrentState(); - } - State thisState = this.currentState; - if (other.currentState == null) { - other.computeCurrentState(); - } - State otherState = other.currentState; + State thisState = this.getCurrentState(); + State otherState = other.getCurrentState(); while (thisState != null && otherState != null) { if (otherState.attribute.getClass() != thisState.attribute.getClass() || !otherState.attribute.equals(thisState.attribute)) { return false; @@ -473,13 +452,8 @@ public class AttributeSource { * @see AttributeImpl#reflectWith */ public final void reflectWith(AttributeReflector reflector) { - if (hasAttributes()) { - if (currentState == null) { - computeCurrentState(); - } - for (State state = currentState; state != null; state = state.next) { - state.attribute.reflectWith(reflector); - } + for (State state = getCurrentState(); state != null; state = state.next) { + state.attribute.reflectWith(reflector); } } @@ -495,10 +469,7 @@ public class AttributeSource { if (hasAttributes()) { // first clone the impls - if (currentState == null) { - computeCurrentState(); - } - for (State state = currentState; state != null; state = state.next) { + for (State state = getCurrentState(); state != null; state = state.next) { clone.attributeImpls.put(state.attribute.getClass(), (AttributeImpl) state.attribute.clone()); } @@ -520,18 +491,13 @@ public class AttributeSource { * {@link #cloneAttributes} instead of {@link #captureState}. */ public final void copyTo(AttributeSource target) { - if (hasAttributes()) { - if (currentState == null) { - computeCurrentState(); - } - for (State state = currentState; state != null; state = state.next) { - final AttributeImpl targetImpl = target.attributeImpls.get(state.attribute.getClass()); - if (targetImpl == null) { - throw new IllegalArgumentException("This AttributeSource contains AttributeImpl of type " + - state.attribute.getClass().getName() + " that is not in the target"); - } - state.attribute.copyTo(targetImpl); + for (State state = getCurrentState(); state != null; state = state.next) { + final AttributeImpl targetImpl = target.attributeImpls.get(state.attribute.getClass()); + if (targetImpl == null) { + throw new IllegalArgumentException("This AttributeSource contains AttributeImpl of type " + + state.attribute.getClass().getName() + " that is not in the target"); } + state.attribute.copyTo(targetImpl); } } diff --git a/lucene/src/java/org/apache/lucene/util/CodecUtil.java b/lucene/src/java/org/apache/lucene/util/CodecUtil.java index 93aa5dc7aa8..f4caa2715b1 100644 --- a/lucene/src/java/org/apache/lucene/util/CodecUtil.java +++ b/lucene/src/java/org/apache/lucene/util/CodecUtil.java @@ -18,8 +18,8 @@ package org.apache.lucene.util; */ -import org.apache.lucene.store.IndexOutput; -import org.apache.lucene.store.IndexInput; +import org.apache.lucene.store.DataInput; +import org.apache.lucene.store.DataOutput; import org.apache.lucene.index.CorruptIndexException; import org.apache.lucene.index.IndexFormatTooNewException; import org.apache.lucene.index.IndexFormatTooOldException; @@ -35,18 +35,16 @@ public final class CodecUtil { private final static int CODEC_MAGIC = 0x3fd76c17; - public static IndexOutput writeHeader(IndexOutput out, String codec, int version) + public static DataOutput writeHeader(DataOutput out, String codec, int version) throws IOException { - final long start = out.getFilePointer(); + BytesRef bytes = new BytesRef(codec); + if (bytes.length != codec.length() || bytes.length >= 128) { + throw new IllegalArgumentException("codec must be simple ASCII, less than 128 characters in length [got " + codec + "]"); + } out.writeInt(CODEC_MAGIC); out.writeString(codec); out.writeInt(version); - // We require this so we can easily pre-compute header length - if (out.getFilePointer()-start != codec.length()+9) { - throw new IllegalArgumentException("codec must be simple ASCII, less than 128 characters in length [got " + codec + "]"); - } - return out; } @@ -54,7 +52,7 @@ public final class CodecUtil { return 9+codec.length(); } - public static int checkHeader(IndexInput in, String codec, int minVersion, int maxVersion) + public static int checkHeader(DataInput in, String codec, int minVersion, int maxVersion) throws IOException { // Safety to guard against reading a bogus string: diff --git a/lucene/src/java/org/apache/lucene/util/PriorityQueue.java b/lucene/src/java/org/apache/lucene/util/PriorityQueue.java index eebc7996256..38252bfa03c 100644 --- a/lucene/src/java/org/apache/lucene/util/PriorityQueue.java +++ b/lucene/src/java/org/apache/lucene/util/PriorityQueue.java @@ -21,8 +21,10 @@ package org.apache.lucene.util; * least element can always be found in constant time. Put()'s and pop()'s * require log(size) time. * - *

NOTE: This class pre-allocates a full array of - * length maxSize+1, in {@link #initialize}. + *

NOTE: This class will pre-allocate a full array of + * length maxSize+1 if instantiated via the + * {@link #PriorityQueue(int,boolean)} constructor with + * prepopulate set to true. * * @lucene.internal */ @@ -83,9 +85,10 @@ public abstract class PriorityQueue { /** * This method can be overridden by extending classes to return a sentinel - * object which will be used by {@link #initialize(int)} to fill the queue, so - * that the code which uses that queue can always assume it's full and only - * change the top without attempting to insert any new object.
+ * object which will be used by the {@link PriorityQueue#PriorityQueue(int,boolean)} + * constructor to fill the queue, so that the code which uses that queue can always + * assume it's full and only change the top without attempting to insert any new + * object.
* * Those sentinel values should always compare worse than any non-sentinel * value (i.e., {@link #lessThan} should always favor the @@ -111,11 +114,11 @@ public abstract class PriorityQueue { * * * NOTE: if this method returns a non-null value, it will be called by - * {@link #initialize(int)} {@link #size()} times, relying on a new object to - * be returned and will not check if it's null again. Therefore you should - * ensure any call to this method creates a new instance and behaves - * consistently, e.g., it cannot return null if it previously returned - * non-null. + * the {@link PriorityQueue#PriorityQueue(int,boolean)} constructor + * {@link #size()} times, relying on a new object to be returned and will not + * check if it's null again. Therefore you should ensure any call to this + * method creates a new instance and behaves consistently, e.g., it cannot + * return null if it previously returned non-null. * * @return the sentinel object to use to pre-populate the queue, or null if * sentinel objects are not supported. diff --git a/lucene/src/java/org/apache/lucene/util/automaton/fst/Builder.java b/lucene/src/java/org/apache/lucene/util/automaton/fst/Builder.java index 4055d3531d2..fed8cd21098 100644 --- a/lucene/src/java/org/apache/lucene/util/automaton/fst/Builder.java +++ b/lucene/src/java/org/apache/lucene/util/automaton/fst/Builder.java @@ -180,7 +180,13 @@ public class Builder { compileAllTargets(node); } final T nextFinalOutput = node.output; - final boolean isFinal = node.isFinal; + + // We "fake" the node as being final if it has no + // outgoing arcs; in theory we could leave it + // as non-final (the FST can represent this), but + // FSTEnum, Util, etc., have trouble w/ non-final + // dead-end states: + final boolean isFinal = node.isFinal || node.numArcs == 0; if (doCompile) { // this node makes it and we now compile it. first, @@ -219,7 +225,7 @@ public class Builder { add(scratchIntsRef, output); } - /** Sugar: adds the UTF32 chars from char[] slice. FST + /** Sugar: adds the UTF32 codepoints from char[] slice. FST * must be FST.INPUT_TYPE.BYTE4! */ public void add(char[] s, int offset, int length, T output) throws IOException { assert fst.getInputType() == FST.INPUT_TYPE.BYTE4; @@ -237,7 +243,7 @@ public class Builder { add(scratchIntsRef, output); } - /** Sugar: adds the UTF32 chars from CharSequence. FST + /** Sugar: adds the UTF32 codepoints from CharSequence. FST * must be FST.INPUT_TYPE.BYTE4! */ public void add(CharSequence s, T output) throws IOException { assert fst.getInputType() == FST.INPUT_TYPE.BYTE4; @@ -268,6 +274,7 @@ public class Builder { // 'finalness' is stored on the incoming arc, not on // the node frontier[0].inputCount++; + frontier[0].isFinal = true; fst.setEmptyOutput(output); return; } @@ -388,6 +395,10 @@ public class Builder { if (!arc.target.isCompiled()) { // not yet compiled @SuppressWarnings("unchecked") final UnCompiledNode n = (UnCompiledNode) arc.target; + if (n.numArcs == 0) { + //System.out.println("seg=" + segment + " FORCE final arc=" + (char) arc.label); + arc.isFinal = n.isFinal = true; + } arc.target = compileNode(n); } } diff --git a/lucene/src/java/org/apache/lucene/util/automaton/fst/FST.java b/lucene/src/java/org/apache/lucene/util/automaton/fst/FST.java index 98a654cbed2..dde66270873 100644 --- a/lucene/src/java/org/apache/lucene/util/automaton/fst/FST.java +++ b/lucene/src/java/org/apache/lucene/util/automaton/fst/FST.java @@ -21,12 +21,14 @@ import java.io.IOException; import org.apache.lucene.store.DataInput; import org.apache.lucene.store.DataOutput; -import org.apache.lucene.store.IndexInput; -import org.apache.lucene.store.IndexOutput; import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.CodecUtil; import org.apache.lucene.util.automaton.fst.Builder.UnCompiledNode; +// NOTE: while the FST is able to represent a non-final +// dead-end state (NON_FINAL_END_NODE=0), the layres above +// (FSTEnum, Util) have problems with this!! + /** Represents an FST using a compact byte[] format. *

The format is similar to what's used by Morfologik * (http://sourceforge.net/projects/morfologik). @@ -168,7 +170,7 @@ public class FST { } // create an existing FST - public FST(IndexInput in, Outputs outputs) throws IOException { + public FST(DataInput in, Outputs outputs) throws IOException { this.outputs = outputs; writer = null; CodecUtil.checkHeader(in, FILE_FORMAT_NAME, VERSION_START, VERSION_START); @@ -216,6 +218,9 @@ public class FST { } void finish(int startNode) { + if (startNode == FINAL_END_NODE && emptyOutput != null) { + startNode = 0; + } if (this.startNode != -1) { throw new IllegalStateException("already finished"); } @@ -250,11 +255,13 @@ public class FST { writer.posWrite = posSave; } - public void save(IndexOutput out) throws IOException { + public void save(DataOutput out) throws IOException { if (startNode == -1) { throw new IllegalStateException("call finish first"); } CodecUtil.writeHeader(out, FILE_FORMAT_NAME, VERSION_CURRENT); + // TODO: really we should encode this as an arc, arriving + // to the root node, instead of special casing here: if (emptyOutput != null) { out.writeByte((byte) 1); out.writeVInt(emptyOutputBytes.length); @@ -468,7 +475,9 @@ public class FST { arc.nextFinalOutput = emptyOutput; } else { arc.flags = BIT_LAST_ARC; + arc.nextFinalOutput = NO_OUTPUT; } + arc.output = NO_OUTPUT; // If there are no nodes, ie, the FST only accepts the // empty string, then startNode is 0, and then readFirstTargetArc @@ -585,12 +594,11 @@ public class FST { * expanded array format. */ boolean isExpandedTarget(Arc follow) throws IOException { - if (follow.isFinal()) { + if (!targetHasArcs(follow)) { return false; } else { final BytesReader in = getBytesReader(follow.target); final byte b = in.readByte(); - return (b & BIT_ARCS_AS_FIXED_ARRAY) != 0; } } @@ -669,8 +677,11 @@ public class FST { } if (arc.flag(BIT_STOP_NODE)) { - arc.target = FINAL_END_NODE; - arc.flags |= BIT_FINAL_ARC; + if (arc.flag(BIT_FINAL_ARC)) { + arc.target = FINAL_END_NODE; + } else { + arc.target = NON_FINAL_END_NODE; + } arc.nextArc = in.pos; } else if (arc.flag(BIT_TARGET_NEXT)) { arc.nextArc = in.pos; diff --git a/lucene/src/test-framework/org/apache/lucene/analysis/BaseTokenStreamTestCase.java b/lucene/src/test-framework/org/apache/lucene/analysis/BaseTokenStreamTestCase.java index aac035114f8..4d4141c6ab3 100644 --- a/lucene/src/test-framework/org/apache/lucene/analysis/BaseTokenStreamTestCase.java +++ b/lucene/src/test-framework/org/apache/lucene/analysis/BaseTokenStreamTestCase.java @@ -19,11 +19,15 @@ package org.apache.lucene.analysis; import java.io.StringReader; import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.Random; import org.apache.lucene.analysis.tokenattributes.*; import org.apache.lucene.util.Attribute; import org.apache.lucene.util.AttributeImpl; import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.util._TestUtil; /** * Base class for all Lucene unit tests that use TokenStreams. @@ -117,11 +121,24 @@ public abstract class BaseTokenStreamTestCase extends LuceneTestCase { assertEquals("type "+i, types[i], typeAtt.type()); if (posIncrements != null) assertEquals("posIncrement "+i, posIncrements[i], posIncrAtt.getPositionIncrement()); + + // we can enforce some basic things about a few attributes even if the caller doesn't check: + if (offsetAtt != null) { + assertTrue("startOffset must be >= 0", offsetAtt.startOffset() >= 0); + assertTrue("endOffset must be >= 0", offsetAtt.endOffset() >= 0); + assertTrue("endOffset must be >= startOffset", offsetAtt.endOffset() >= offsetAtt.startOffset()); + } + if (posIncrAtt != null) { + assertTrue("posIncrement must be >= 0", posIncrAtt.getPositionIncrement() >= 0); + } } assertFalse("end of stream", ts.incrementToken()); ts.end(); if (finalOffset != null) assertEquals("finalOffset ", finalOffset.intValue(), offsetAtt.endOffset()); + if (offsetAtt != null) { + assertTrue("finalOffset must be >= 0", offsetAtt.endOffset() >= 0); + } ts.close(); } @@ -216,4 +233,39 @@ public abstract class BaseTokenStreamTestCase extends LuceneTestCase { assertAnalyzesToReuse(a, input, new String[]{expected}); } + // simple utility method for blasting tokenstreams with data to make sure they don't do anything crazy + + public static void checkRandomData(Random random, Analyzer a, int iterations) throws IOException { + checkRandomData(random, a, iterations, 20); + } + + public static void checkRandomData(Random random, Analyzer a, int iterations, int maxWordLength) throws IOException { + for (int i = 0; i < iterations; i++) { + String text; + switch(_TestUtil.nextInt(random, 0, 3)) { + case 0: + text = _TestUtil.randomSimpleString(random); + break; + case 1: + text = _TestUtil.randomRealisticUnicodeString(random, maxWordLength); + break; + default: + text = _TestUtil.randomUnicodeString(random, maxWordLength); + } + + TokenStream ts = a.reusableTokenStream("dummy", new StringReader(text)); + assertTrue("has no CharTermAttribute", ts.hasAttribute(CharTermAttribute.class)); + CharTermAttribute termAtt = ts.getAttribute(CharTermAttribute.class); + List tokens = new ArrayList(); + ts.reset(); + while (ts.incrementToken()) { + tokens.add(termAtt.toString()); + // TODO: we could collect offsets etc here for better checking that reset() really works. + } + ts.close(); + // verify reusing is "reproducable" and also get the normal tokenstream sanity checks + if (!tokens.isEmpty()) + assertAnalyzesToReuse(a, text, tokens.toArray(new String[tokens.size()])); + } + } } diff --git a/lucene/src/test-framework/org/apache/lucene/analysis/MockAnalyzer.java b/lucene/src/test-framework/org/apache/lucene/analysis/MockAnalyzer.java index a99a3435b0e..ae889c1c3b1 100644 --- a/lucene/src/test-framework/org/apache/lucene/analysis/MockAnalyzer.java +++ b/lucene/src/test-framework/org/apache/lucene/analysis/MockAnalyzer.java @@ -19,10 +19,10 @@ package org.apache.lucene.analysis; import java.io.IOException; import java.io.Reader; +import java.util.HashMap; +import java.util.Map; +import java.util.Random; -import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; -import org.apache.lucene.analysis.tokenattributes.PayloadAttribute; -import org.apache.lucene.index.Payload; import org.apache.lucene.util.automaton.CharacterRunAutomaton; /** @@ -33,67 +33,50 @@ public final class MockAnalyzer extends Analyzer { private final boolean lowerCase; private final CharacterRunAutomaton filter; private final boolean enablePositionIncrements; - private final boolean payload; private int positionIncrementGap; - - /** - * Calls {@link #MockAnalyzer(CharacterRunAutomaton, boolean, CharacterRunAutomaton, boolean, boolean) - * MockAnalyzer(runAutomaton, lowerCase, filter, enablePositionIncrements, true}). - */ - public MockAnalyzer(CharacterRunAutomaton runAutomaton, boolean lowerCase, CharacterRunAutomaton filter, boolean enablePositionIncrements) { - this(runAutomaton, lowerCase, filter, enablePositionIncrements, true); - } + private final Random random; + private Map previousMappings = new HashMap(); /** * Creates a new MockAnalyzer. * + * @param random Random for payloads behavior * @param runAutomaton DFA describing how tokenization should happen (e.g. [a-zA-Z]+) * @param lowerCase true if the tokenizer should lowercase terms * @param filter DFA describing how terms should be filtered (set of stopwords, etc) * @param enablePositionIncrements true if position increments should reflect filtered terms. - * @param payload if payloads should be added containing the positions (for testing) */ - public MockAnalyzer(CharacterRunAutomaton runAutomaton, boolean lowerCase, CharacterRunAutomaton filter, boolean enablePositionIncrements, boolean payload) { + public MockAnalyzer(Random random, CharacterRunAutomaton runAutomaton, boolean lowerCase, CharacterRunAutomaton filter, boolean enablePositionIncrements) { + this.random = random; this.runAutomaton = runAutomaton; this.lowerCase = lowerCase; this.filter = filter; this.enablePositionIncrements = enablePositionIncrements; - this.payload = payload; } /** - * Calls {@link #MockAnalyzer(CharacterRunAutomaton, boolean, CharacterRunAutomaton, boolean, boolean) - * MockAnalyzer(runAutomaton, lowerCase, MockTokenFilter.EMPTY_STOPSET, false, true}). + * Calls {@link #MockAnalyzer(Random, CharacterRunAutomaton, boolean, CharacterRunAutomaton, boolean) + * MockAnalyzer(random, runAutomaton, lowerCase, MockTokenFilter.EMPTY_STOPSET, false}). */ - public MockAnalyzer(CharacterRunAutomaton runAutomaton, boolean lowerCase) { - this(runAutomaton, lowerCase, MockTokenFilter.EMPTY_STOPSET, false, true); + public MockAnalyzer(Random random, CharacterRunAutomaton runAutomaton, boolean lowerCase) { + this(random, runAutomaton, lowerCase, MockTokenFilter.EMPTY_STOPSET, false); } - /** - * Calls {@link #MockAnalyzer(CharacterRunAutomaton, boolean, CharacterRunAutomaton, boolean, boolean) - * MockAnalyzer(runAutomaton, lowerCase, MockTokenFilter.EMPTY_STOPSET, false, payload}). - */ - public MockAnalyzer(CharacterRunAutomaton runAutomaton, boolean lowerCase, boolean payload) { - this(runAutomaton, lowerCase, MockTokenFilter.EMPTY_STOPSET, false, payload); - } - /** * Create a Whitespace-lowercasing analyzer with no stopwords removal. *

- * Calls {@link #MockAnalyzer(CharacterRunAutomaton, boolean, CharacterRunAutomaton, boolean, boolean) - * MockAnalyzer(MockTokenizer.WHITESPACE, true, MockTokenFilter.EMPTY_STOPSET, false, true}). + * Calls {@link #MockAnalyzer(Random, CharacterRunAutomaton, boolean, CharacterRunAutomaton, boolean) + * MockAnalyzer(random, MockTokenizer.WHITESPACE, true, MockTokenFilter.EMPTY_STOPSET, false}). */ - public MockAnalyzer() { - this(MockTokenizer.WHITESPACE, true); + public MockAnalyzer(Random random) { + this(random, MockTokenizer.WHITESPACE, true); } @Override public TokenStream tokenStream(String fieldName, Reader reader) { MockTokenizer tokenizer = new MockTokenizer(reader, runAutomaton, lowerCase); TokenFilter filt = new MockTokenFilter(tokenizer, filter, enablePositionIncrements); - if (payload){ - filt = new SimplePayloadFilter(filt, fieldName); - } + filt = maybePayload(filt, fieldName); return filt; } @@ -105,15 +88,19 @@ public final class MockAnalyzer extends Analyzer { @Override public TokenStream reusableTokenStream(String fieldName, Reader reader) throws IOException { - SavedStreams saved = (SavedStreams) getPreviousTokenStream(); + @SuppressWarnings("unchecked") Map map = (Map) getPreviousTokenStream(); + if (map == null) { + map = new HashMap(); + setPreviousTokenStream(map); + } + + SavedStreams saved = map.get(fieldName); if (saved == null) { saved = new SavedStreams(); saved.tokenizer = new MockTokenizer(reader, runAutomaton, lowerCase); saved.filter = new MockTokenFilter(saved.tokenizer, filter, enablePositionIncrements); - if (payload){ - saved.filter = new SimplePayloadFilter(saved.filter, fieldName); - } - setPreviousTokenStream(saved); + saved.filter = maybePayload(saved.filter, fieldName); + map.put(fieldName, saved); return saved.filter; } else { saved.tokenizer.reset(reader); @@ -122,6 +109,28 @@ public final class MockAnalyzer extends Analyzer { } } + private synchronized TokenFilter maybePayload(TokenFilter stream, String fieldName) { + Integer val = previousMappings.get(fieldName); + if (val == null) { + switch(random.nextInt(3)) { + case 0: val = -1; // no payloads + break; + case 1: val = Integer.MAX_VALUE; // variable length payload + break; + case 2: val = random.nextInt(12); // fixed length payload + break; + } + previousMappings.put(fieldName, val); // save it so we are consistent for this field + } + + if (val == -1) + return stream; + else if (val == Integer.MAX_VALUE) + return new MockVariableLengthPayloadFilter(random, stream); + else + return new MockFixedLengthPayloadFilter(random, stream, val); + } + public void setPositionIncrementGap(int positionIncrementGap){ this.positionIncrementGap = positionIncrementGap; } @@ -131,35 +140,3 @@ public final class MockAnalyzer extends Analyzer { return positionIncrementGap; } } - -final class SimplePayloadFilter extends TokenFilter { - String fieldName; - int pos; - final PayloadAttribute payloadAttr; - final CharTermAttribute termAttr; - - public SimplePayloadFilter(TokenStream input, String fieldName) { - super(input); - this.fieldName = fieldName; - pos = 0; - payloadAttr = input.addAttribute(PayloadAttribute.class); - termAttr = input.addAttribute(CharTermAttribute.class); - } - - @Override - public boolean incrementToken() throws IOException { - if (input.incrementToken()) { - payloadAttr.setPayload(new Payload(("pos: " + pos).getBytes())); - pos++; - return true; - } else { - return false; - } - } - - @Override - public void reset() throws IOException { - super.reset(); - pos = 0; - } -} diff --git a/lucene/src/test-framework/org/apache/lucene/analysis/MockFixedLengthPayloadFilter.java b/lucene/src/test-framework/org/apache/lucene/analysis/MockFixedLengthPayloadFilter.java new file mode 100644 index 00000000000..af0c3646257 --- /dev/null +++ b/lucene/src/test-framework/org/apache/lucene/analysis/MockFixedLengthPayloadFilter.java @@ -0,0 +1,49 @@ +package org.apache.lucene.analysis; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.util.Random; + +import org.apache.lucene.analysis.tokenattributes.PayloadAttribute; +import org.apache.lucene.index.Payload; + +public final class MockFixedLengthPayloadFilter extends TokenFilter { + private final PayloadAttribute payloadAtt = addAttribute(PayloadAttribute.class); + private final Random random; + private final byte[] bytes; + private final Payload payload; + + public MockFixedLengthPayloadFilter(Random random, TokenStream in, int length) { + super(in); + this.random = random; + this.bytes = new byte[length]; + this.payload = new Payload(bytes); + } + + @Override + public boolean incrementToken() throws IOException { + if (input.incrementToken()) { + random.nextBytes(bytes); + payloadAtt.setPayload(payload); + return true; + } else { + return false; + } + } +} diff --git a/lucene/src/test-framework/org/apache/lucene/analysis/MockVariableLengthPayloadFilter.java b/lucene/src/test-framework/org/apache/lucene/analysis/MockVariableLengthPayloadFilter.java new file mode 100644 index 00000000000..f7b5361df83 --- /dev/null +++ b/lucene/src/test-framework/org/apache/lucene/analysis/MockVariableLengthPayloadFilter.java @@ -0,0 +1,51 @@ +package org.apache.lucene.analysis; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.util.Random; + +import org.apache.lucene.analysis.tokenattributes.PayloadAttribute; +import org.apache.lucene.index.Payload; + +public final class MockVariableLengthPayloadFilter extends TokenFilter { + private static final int MAXLENGTH = 129; + + private final PayloadAttribute payloadAtt = addAttribute(PayloadAttribute.class); + private final Random random; + private final byte[] bytes = new byte[MAXLENGTH]; + private final Payload payload; + + public MockVariableLengthPayloadFilter(Random random, TokenStream in) { + super(in); + this.random = random; + this.payload = new Payload(bytes); + } + + @Override + public boolean incrementToken() throws IOException { + if (input.incrementToken()) { + random.nextBytes(bytes); + payload.setData(bytes, 0, random.nextInt(MAXLENGTH)); + payloadAtt.setPayload(payload); + return true; + } else { + return false; + } + } +} diff --git a/lucene/src/test-framework/org/apache/lucene/index/DocHelper.java b/lucene/src/test-framework/org/apache/lucene/index/DocHelper.java index 28bcdff4f7f..ac6b175493b 100644 --- a/lucene/src/test-framework/org/apache/lucene/index/DocHelper.java +++ b/lucene/src/test-framework/org/apache/lucene/index/DocHelper.java @@ -21,6 +21,7 @@ import java.io.IOException; import java.io.UnsupportedEncodingException; import java.util.HashMap; import java.util.Map; +import java.util.Random; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.MockAnalyzer; @@ -30,6 +31,8 @@ import org.apache.lucene.document.Field; import org.apache.lucene.document.Fieldable; import org.apache.lucene.search.SimilarityProvider; import org.apache.lucene.store.Directory; +import org.apache.lucene.util.LuceneTestCase; + import static org.apache.lucene.util.LuceneTestCase.TEST_VERSION_CURRENT; class DocHelper { @@ -218,9 +221,9 @@ class DocHelper { * @param doc * @throws IOException */ - public static SegmentInfo writeDoc(Directory dir, Document doc) throws IOException + public static SegmentInfo writeDoc(Random random, Directory dir, Document doc) throws IOException { - return writeDoc(dir, new MockAnalyzer(MockTokenizer.WHITESPACE, false), null, doc); + return writeDoc(random, dir, new MockAnalyzer(random, MockTokenizer.WHITESPACE, false), null, doc); } /** @@ -233,8 +236,8 @@ class DocHelper { * @param doc * @throws IOException */ - public static SegmentInfo writeDoc(Directory dir, Analyzer analyzer, SimilarityProvider similarity, Document doc) throws IOException { - IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig( + public static SegmentInfo writeDoc(Random random, Directory dir, Analyzer analyzer, SimilarityProvider similarity, Document doc) throws IOException { + IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig( /* LuceneTestCase.newIndexWriterConfig(random, */ TEST_VERSION_CURRENT, analyzer).setSimilarityProvider(similarity)); //writer.setUseCompoundFile(false); writer.addDocument(doc); diff --git a/lucene/src/test-framework/org/apache/lucene/index/RandomIndexWriter.java b/lucene/src/test-framework/org/apache/lucene/index/RandomIndexWriter.java index 27962632acc..0712e4104c4 100644 --- a/lucene/src/test-framework/org/apache/lucene/index/RandomIndexWriter.java +++ b/lucene/src/test-framework/org/apache/lucene/index/RandomIndexWriter.java @@ -42,6 +42,7 @@ public class RandomIndexWriter implements Closeable { private final Random r; int docCount; int flushAt; + private double flushAtFactor = 1.0; private boolean getReaderCalled; // Randomly calls Thread.yield so we mixup thread scheduling @@ -67,7 +68,7 @@ public class RandomIndexWriter implements Closeable { /** create a RandomIndexWriter with a random config: Uses TEST_VERSION_CURRENT and MockAnalyzer */ public RandomIndexWriter(Random r, Directory dir) throws IOException { - this(r, dir, LuceneTestCase.newIndexWriterConfig(r, LuceneTestCase.TEST_VERSION_CURRENT, new MockAnalyzer())); + this(r, dir, LuceneTestCase.newIndexWriterConfig(r, LuceneTestCase.TEST_VERSION_CURRENT, new MockAnalyzer(r))); } /** create a RandomIndexWriter with a random config: Uses TEST_VERSION_CURRENT */ @@ -98,12 +99,20 @@ public class RandomIndexWriter implements Closeable { */ public void addDocument(Document doc) throws IOException { w.addDocument(doc); + maybeCommit(); + } + + private void maybeCommit() throws IOException { if (docCount++ == flushAt) { if (LuceneTestCase.VERBOSE) { - System.out.println("RIW.addDocument: now doing a commit"); + System.out.println("RIW.add/updateDocument: now doing a commit at docCount=" + docCount); } w.commit(); - flushAt += _TestUtil.nextInt(r, 10, 1000); + flushAt += _TestUtil.nextInt(r, (int) (flushAtFactor * 10), (int) (flushAtFactor * 1000)); + if (flushAtFactor < 2e6) { + // gradually but exponentially increase time b/w flushes + flushAtFactor *= 1.05; + } } } @@ -113,13 +122,7 @@ public class RandomIndexWriter implements Closeable { */ public void updateDocument(Term t, Document doc) throws IOException { w.updateDocument(t, doc); - if (docCount++ == flushAt) { - if (LuceneTestCase.VERBOSE) { - System.out.println("RIW.updateDocument: now doing a commit"); - } - w.commit(); - flushAt += _TestUtil.nextInt(r, 10, 1000); - } + maybeCommit(); } public void addIndexes(Directory... dirs) throws CorruptIndexException, IOException { @@ -181,7 +184,7 @@ public class RandomIndexWriter implements Closeable { System.out.println("RIW.getReader: open new reader"); } w.commit(); - return IndexReader.open(w.getDirectory(), new KeepOnlyLastCommitDeletionPolicy(), r.nextBoolean(), _TestUtil.nextInt(r, 1, 10)); + return IndexReader.open(w.getDirectory(), new KeepOnlyLastCommitDeletionPolicy(), r.nextBoolean(), _TestUtil.nextInt(r, 1, 10), w.getConfig().getCodecProvider()); } } diff --git a/lucene/src/test-framework/org/apache/lucene/index/codecs/mockrandom/MockRandomCodec.java b/lucene/src/test-framework/org/apache/lucene/index/codecs/mockrandom/MockRandomCodec.java index a29dbbdd9f5..2f14297b3ab 100644 --- a/lucene/src/test-framework/org/apache/lucene/index/codecs/mockrandom/MockRandomCodec.java +++ b/lucene/src/test-framework/org/apache/lucene/index/codecs/mockrandom/MockRandomCodec.java @@ -120,7 +120,14 @@ public class MockRandomCodec extends Codec { @Override public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException { - + // we pull this before the seed intentionally: because its not consumed at runtime + // (the skipInterval is written into postings header) + int skipInterval = _TestUtil.nextInt(seedRandom, 2, 10); + + if (LuceneTestCase.VERBOSE) { + System.out.println("MockRandomCodec: skipInterval=" + skipInterval); + } + final long seed = seedRandom.nextLong(); if (LuceneTestCase.VERBOSE) { @@ -136,12 +143,12 @@ public class MockRandomCodec extends Codec { PostingsWriterBase postingsWriter; if (random.nextBoolean()) { - postingsWriter = new SepPostingsWriterImpl(state, new MockIntStreamFactory(random)); + postingsWriter = new SepPostingsWriterImpl(state, new MockIntStreamFactory(random), skipInterval); } else { if (LuceneTestCase.VERBOSE) { System.out.println("MockRandomCodec: writing Standard postings"); } - postingsWriter = new StandardPostingsWriter(state); + postingsWriter = new StandardPostingsWriter(state, skipInterval); } if (random.nextBoolean()) { diff --git a/lucene/src/test-framework/org/apache/lucene/search/QueryUtils.java b/lucene/src/test-framework/org/apache/lucene/search/QueryUtils.java index 7ccd225e113..2683acf9598 100644 --- a/lucene/src/test-framework/org/apache/lucene/search/QueryUtils.java +++ b/lucene/src/test-framework/org/apache/lucene/search/QueryUtils.java @@ -2,13 +2,14 @@ package org.apache.lucene.search; import java.io.IOException; import java.util.Random; +import java.lang.reflect.Method; import junit.framework.Assert; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.document.Document; -import org.apache.lucene.index.IndexReader.AtomicReaderContext; import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexReader.AtomicReaderContext; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.MultiReader; @@ -166,7 +167,7 @@ public class QueryUtils { throws IOException { Directory d = new MockDirectoryWrapper(random, new RAMDirectory()); IndexWriter w = new IndexWriter(d, new IndexWriterConfig( - TEST_VERSION_CURRENT, new MockAnalyzer())); + TEST_VERSION_CURRENT, new MockAnalyzer(random))); for (int i = 0; i < numDeletedDocs; i++) { w.addDocument(new Document()); } @@ -309,7 +310,7 @@ public class QueryUtils { // confirm that skipping beyond the last doc, on the // previous reader, hits NO_MORE_DOCS final IndexReader previousReader = lastReader[0]; - IndexSearcher indexSearcher = LuceneTestCase.newSearcher(previousReader); + IndexSearcher indexSearcher = LuceneTestCase.newSearcher(previousReader, false); Weight w = q.weight(indexSearcher); Scorer scorer = w.scorer((AtomicReaderContext)previousReader.getTopReaderContext(), ScorerContext.def()); if (scorer != null) { diff --git a/lucene/src/test-framework/org/apache/lucene/store/MockDirectoryWrapper.java b/lucene/src/test-framework/org/apache/lucene/store/MockDirectoryWrapper.java index 03c07f45db3..17b62a6f94d 100644 --- a/lucene/src/test-framework/org/apache/lucene/store/MockDirectoryWrapper.java +++ b/lucene/src/test-framework/org/apache/lucene/store/MockDirectoryWrapper.java @@ -32,7 +32,9 @@ import java.util.Random; import java.util.Set; import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.codecs.CodecProvider; import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.util.ThrottledIndexOutput; import org.apache.lucene.util._TestUtil; /** @@ -68,6 +70,7 @@ public class MockDirectoryWrapper extends Directory { private Set createdFiles; Set openFilesForWrite = new HashSet(); volatile boolean crashed; + private ThrottledIndexOutput throttledOutput; // use this for tracking files for crash. // additionally: provides debugging information in case you leave one open @@ -113,6 +116,10 @@ public class MockDirectoryWrapper extends Directory { public void setPreventDoubleWrite(boolean value) { preventDoubleWrite = value; } + + public void setThrottledIndexOutput(ThrottledIndexOutput throttledOutput) { + this.throttledOutput = throttledOutput; + } @Override public synchronized void sync(Collection names) throws IOException { @@ -347,7 +354,7 @@ public class MockDirectoryWrapper extends Directory { IndexOutput io = new MockIndexOutputWrapper(this, delegate.createOutput(name), name); openFileHandles.put(io, new RuntimeException("unclosed IndexOutput")); openFilesForWrite.add(name); - return io; + return throttledOutput == null ? io : throttledOutput.newFromDelegate(io); } @Override @@ -419,12 +426,30 @@ public class MockDirectoryWrapper extends Directory { throw new RuntimeException("MockDirectoryWrapper: cannot close: there are still open files: " + openFiles, cause); } open = false; - if (checkIndexOnClose && IndexReader.indexExists(this)) { - _TestUtil.checkIndex(this); + if (checkIndexOnClose) { + if (LuceneTestCase.VERBOSE) { + System.out.println("\nNOTE: MockDirectoryWrapper: now run CheckIndex"); + } + if (codecProvider != null) { + if (IndexReader.indexExists(this, codecProvider)) { + _TestUtil.checkIndex(this, codecProvider); + } + } else { + if (IndexReader.indexExists(this)) { + _TestUtil.checkIndex(this); + } + } } delegate.close(); } + private CodecProvider codecProvider; + + // We pass this CodecProvider to checkIndex when dir is closed... + public void setCodecProvider(CodecProvider cp) { + codecProvider = cp; + } + boolean open = true; public synchronized boolean isOpen() { @@ -559,4 +584,5 @@ public class MockDirectoryWrapper extends Directory { maybeYield(); delegate.copy(to, src, dest); } + } diff --git a/lucene/src/test-framework/org/apache/lucene/util/LineFileDocs.java b/lucene/src/test-framework/org/apache/lucene/util/LineFileDocs.java index 76577166623..a4cd41f05c9 100644 --- a/lucene/src/test-framework/org/apache/lucene/util/LineFileDocs.java +++ b/lucene/src/test-framework/org/apache/lucene/util/LineFileDocs.java @@ -24,7 +24,6 @@ import java.io.IOException; import java.io.BufferedReader; import java.io.InputStreamReader; import java.io.InputStream; -import java.io.BufferedInputStream; import java.util.concurrent.atomic.AtomicInteger; import java.util.zip.GZIPInputStream; import java.util.Random; @@ -79,8 +78,7 @@ public class LineFileDocs implements Closeable { size *= 2.8; } - final InputStream in = new BufferedInputStream(is, BUFFER_SIZE); - reader = new BufferedReader(new InputStreamReader(in, "UTF-8"), BUFFER_SIZE); + reader = new BufferedReader(new InputStreamReader(is, "UTF-8"), BUFFER_SIZE); // Override sizes for currently "known" line files: if (path.equals("europarl.lines.txt.gz")) { @@ -128,7 +126,7 @@ public class LineFileDocs implements Closeable { body = new Field("body", "", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS); doc.add(body); - id = new Field("id", "", Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS); + id = new Field("docid", "", Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS); doc.add(id); date = new Field("date", "", Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS); diff --git a/lucene/src/test-framework/org/apache/lucene/util/LuceneTestCase.java b/lucene/src/test-framework/org/apache/lucene/util/LuceneTestCase.java index b56aaf2e4d5..5888a1c008c 100644 --- a/lucene/src/test-framework/org/apache/lucene/util/LuceneTestCase.java +++ b/lucene/src/test-framework/org/apache/lucene/util/LuceneTestCase.java @@ -116,7 +116,7 @@ public abstract class LuceneTestCase extends Assert { * If this is set, it is the only method that should run. */ static final String TEST_METHOD; - + /** Create indexes in this directory, optimally use a subdir, named after the test */ public static final File TEMP_DIR; static { @@ -128,6 +128,9 @@ public abstract class LuceneTestCase extends Assert { TEMP_DIR = new File(s); TEMP_DIR.mkdirs(); } + + /** set of directories we created, in afterclass we try to clean these up */ + static final Set tempDirs = Collections.synchronizedSet(new HashSet()); // by default we randomly pick a different codec for // each test case (non-J4 tests) and each test class (J4 @@ -142,6 +145,8 @@ public abstract class LuceneTestCase extends Assert { public static final String TEST_DIRECTORY = System.getProperty("tests.directory", "random"); /** Get the number of times to run tests */ public static final int TEST_ITER = Integer.parseInt(System.getProperty("tests.iter", "1")); + /** Get the minimum number of times to run tests until a failure happens */ + public static final int TEST_ITER_MIN = Integer.parseInt(System.getProperty("tests.iter.min", Integer.toString(TEST_ITER))); /** Get the random seed for tests */ public static final String TEST_SEED = System.getProperty("tests.seed", "random"); /** whether or not nightly tests should run */ @@ -158,11 +163,11 @@ public abstract class LuceneTestCase extends Assert { * multiply it by the number of iterations */ public static final int RANDOM_MULTIPLIER = Integer.parseInt(System.getProperty("tests.multiplier", "1")); - + private int savedBoolMaxClauseCount; private volatile Thread.UncaughtExceptionHandler savedUncaughtExceptionHandler = null; - + /** Used to track if setUp and tearDown are called correctly from subclasses */ private boolean setup; @@ -184,28 +189,28 @@ public abstract class LuceneTestCase extends Assert { private static class UncaughtExceptionEntry { public final Thread thread; public final Throwable exception; - + public UncaughtExceptionEntry(Thread thread, Throwable exception) { this.thread = thread; this.exception = exception; } } private List uncaughtExceptions = Collections.synchronizedList(new ArrayList()); - + // saves default codec: we do this statically as many build indexes in @beforeClass private static String savedDefaultCodec; // default codec: not set when we use a per-field provider. private static Codec codec; // default codec provider private static CodecProvider savedCodecProvider; - + private static Locale locale; private static Locale savedLocale; private static TimeZone timeZone; private static TimeZone savedTimeZone; - + private static Map stores; - + private static final String[] TEST_CODECS = new String[] {"MockSep", "MockFixedIntBlock", "MockVariableIntBlock", "MockRandom"}; private static void swapCodec(Codec c, CodecProvider cp) { @@ -283,7 +288,7 @@ public abstract class LuceneTestCase extends Assert { // randomly picks from core and test codecs static String pickRandomCodec(Random rnd) { - int idx = rnd.nextInt(CodecProvider.CORE_CODECS.length + + int idx = rnd.nextInt(CodecProvider.CORE_CODECS.length + TEST_CODECS.length); if (idx < CodecProvider.CORE_CODECS.length) { return CodecProvider.CORE_CODECS[idx]; @@ -316,11 +321,12 @@ public abstract class LuceneTestCase extends Assert { /** @deprecated (4.0) until we fix no-fork problems in solr tests */ @Deprecated private static List testClassesRun = new ArrayList(); - + @BeforeClass public static void beforeClassLuceneTestCaseJ4() { staticSeed = "random".equals(TEST_SEED) ? seedRand.nextLong() : TwoLongs.fromString(TEST_SEED).l1; random.setSeed(staticSeed); + tempDirs.clear(); stores = Collections.synchronizedMap(new IdentityHashMap()); savedCodecProvider = CodecProvider.getDefault(); if ("randomPerField".equals(TEST_CODEC)) { @@ -341,7 +347,7 @@ public abstract class LuceneTestCase extends Assert { TimeZone.setDefault(timeZone); testsFailed = false; } - + @AfterClass public static void afterClassLuceneTestCaseJ4() { if (! "false".equals(TEST_CLEAN_THREADS)) { @@ -357,12 +363,12 @@ public abstract class LuceneTestCase extends Assert { if ("randomPerField".equals(TEST_CODEC)) { if (cp instanceof RandomCodecProvider) codecDescription = cp.toString(); - else + else codecDescription = "PreFlex"; } else { codecDescription = codec.toString(); } - + if (CodecProvider.getDefault() == savedCodecProvider) removeTestCodecs(codec, CodecProvider.getDefault()); CodecProvider.setDefault(savedCodecProvider); @@ -392,14 +398,14 @@ public abstract class LuceneTestCase extends Assert { stores = null; // if verbose or tests failed, report some information back if (VERBOSE || testsFailed) - System.err.println("NOTE: test params are: codec=" + codecDescription + - ", locale=" + locale + + System.err.println("NOTE: test params are: codec=" + codecDescription + + ", locale=" + locale + ", timezone=" + (timeZone == null ? "(null)" : timeZone.getID())); if (testsFailed) { System.err.println("NOTE: all tests run in this JVM:"); System.err.println(Arrays.toString(testClassesRun.toArray())); - System.err.println("NOTE: " + System.getProperty("os.name") + " " - + System.getProperty("os.version") + " " + System.err.println("NOTE: " + System.getProperty("os.name") + " " + + System.getProperty("os.version") + " " + System.getProperty("os.arch") + "/" + System.getProperty("java.vendor") + " " + System.getProperty("java.version") + " " @@ -409,10 +415,20 @@ public abstract class LuceneTestCase extends Assert { + "free=" + Runtime.getRuntime().freeMemory() + "," + "total=" + Runtime.getRuntime().totalMemory()); } + // clear out any temp directories if we can + if (!testsFailed) { + for (String path : tempDirs) { + try { + _TestUtil.rmDir(new File(path)); + } catch (IOException e) { + e.printStackTrace(); + } + } + } } private static boolean testsFailed; /* true if any tests failed */ - + // This is how we get control when errors occur. // Think of this as start/end/success/failed // events. @@ -447,7 +463,7 @@ public abstract class LuceneTestCase extends Assert { LuceneTestCase.this.name = method.getName(); super.starting(method); } - + }; @Before @@ -465,7 +481,7 @@ public abstract class LuceneTestCase extends Assert { savedUncaughtExceptionHandler.uncaughtException(t, e); } }); - + savedBoolMaxClauseCount = BooleanQuery.getMaxClauseCount(); } @@ -497,7 +513,7 @@ public abstract class LuceneTestCase extends Assert { if ("perMethod".equals(TEST_CLEAN_THREADS)) { int rogueThreads = threadCleanup("test method: '" + getName() + "'"); if (rogueThreads > 0) { - System.err.println("RESOURCE LEAK: test method: '" + getName() + System.err.println("RESOURCE LEAK: test method: '" + getName() + "' left " + rogueThreads + " thread(s) running"); // TODO: fail, but print seed for now. if (!testsFailed && uncaughtExceptions.isEmpty()) { @@ -519,18 +535,18 @@ public abstract class LuceneTestCase extends Assert { fail("Some threads threw uncaught exceptions!"); } - // calling assertSaneFieldCaches here isn't as useful as having test - // classes call it directly from the scope where the index readers - // are used, because they could be gc'ed just before this tearDown + // calling assertSaneFieldCaches here isn't as useful as having test + // classes call it directly from the scope where the index readers + // are used, because they could be gc'ed just before this tearDown // method is called. // // But it's better then nothing. // - // If you are testing functionality that you know for a fact - // "violates" FieldCache sanity, then you should either explicitly + // If you are testing functionality that you know for a fact + // "violates" FieldCache sanity, then you should either explicitly // call purgeFieldCache at the end of your test method, or refactor - // your Test class so that the inconsistant FieldCache usages are - // isolated in distinct test methods + // your Test class so that the inconsistant FieldCache usages are + // isolated in distinct test methods assertSaneFieldCaches(getTestLabel()); } finally { @@ -541,14 +557,14 @@ public abstract class LuceneTestCase extends Assert { private final static int THREAD_STOP_GRACE_MSEC = 50; // jvm-wide list of 'rogue threads' we found, so they only get reported once. private final static IdentityHashMap rogueThreads = new IdentityHashMap(); - + static { // just a hack for things like eclipse test-runner threads for (Thread t : Thread.getAllStackTraces().keySet()) { rogueThreads.put(t, true); } } - + /** * Looks for leftover running threads, trying to kill them off, * so they don't fail future tests. @@ -559,20 +575,20 @@ public abstract class LuceneTestCase extends Assert { Thread[] stillRunning = new Thread[Thread.activeCount()+1]; int threadCount = 0; int rogueCount = 0; - + if ((threadCount = Thread.enumerate(stillRunning)) > 1) { while (threadCount == stillRunning.length) { // truncated response stillRunning = new Thread[stillRunning.length*2]; threadCount = Thread.enumerate(stillRunning); } - + for (int i = 0; i < threadCount; i++) { Thread t = stillRunning[i]; - - if (t.isAlive() && - !rogueThreads.containsKey(t) && - t != Thread.currentThread() && + + if (t.isAlive() && + !rogueThreads.containsKey(t) && + t != Thread.currentThread() && /* its ok to keep your searcher across test cases */ (t.getName().startsWith("LuceneTestCase") && context.startsWith("test method")) == false) { System.err.println("WARNING: " + context + " left thread running: " + t); @@ -597,7 +613,7 @@ public abstract class LuceneTestCase extends Assert { } return rogueCount; } - + /** * Asserts that FieldCacheSanityChecker does not detect any * problems with FieldCache.DEFAULT. @@ -640,13 +656,13 @@ public abstract class LuceneTestCase extends Assert { } } - + // @deprecated (4.0) These deprecated methods should be removed soon, when all tests using no Epsilon are fixed: @Deprecated static public void assertEquals(double expected, double actual) { assertEquals(null, expected, actual); } - + @Deprecated static public void assertEquals(String message, double expected, double actual) { assertEquals(message, Double.valueOf(expected), Double.valueOf(actual)); @@ -661,18 +677,18 @@ public abstract class LuceneTestCase extends Assert { static public void assertEquals(String message, float expected, float actual) { assertEquals(message, Float.valueOf(expected), Float.valueOf(actual)); } - + // Replacement for Assume jUnit class, so we can add a message with explanation: - + private static final class TestIgnoredException extends RuntimeException { TestIgnoredException(String msg) { super(msg); } - + TestIgnoredException(String msg, Throwable t) { super(msg, t); } - + @Override public String getMessage() { StringBuilder sb = new StringBuilder(super.getMessage()); @@ -680,7 +696,7 @@ public abstract class LuceneTestCase extends Assert { sb.append(" - ").append(getCause()); return sb.toString(); } - + // only this one is called by our code, exception is not used outside this class: @Override public void printStackTrace(PrintStream s) { @@ -692,19 +708,19 @@ public abstract class LuceneTestCase extends Assert { } } } - + public static void assumeTrue(String msg, boolean b) { Assume.assumeNoException(b ? null : new TestIgnoredException(msg)); } - + public static void assumeFalse(String msg, boolean b) { assumeTrue(msg, !b); } - + public static void assumeNoException(String msg, Exception e) { Assume.assumeNoException(e == null ? null : new TestIgnoredException(msg, e)); } - + public static Set asSet(T... args) { return new HashSet(Arrays.asList(args)); } @@ -762,13 +778,15 @@ public abstract class LuceneTestCase extends Assert { c.setTermIndexInterval(_TestUtil.nextInt(r, 1, 1000)); } if (r.nextBoolean()) { - c.setMaxThreadStates(_TestUtil.nextInt(r, 1, 20)); + c.setIndexerThreadPool(new ThreadAffinityDocumentsWriterThreadPool(_TestUtil.nextInt(r, 1, 20))); } if (r.nextBoolean()) { - c.setMergePolicy(new MockRandomMergePolicy(r)); - } else { + c.setMergePolicy(newTieredMergePolicy()); + } else if (r.nextBoolean()) { c.setMergePolicy(newLogMergePolicy()); + } else { + c.setMergePolicy(new MockRandomMergePolicy(r)); } c.setReaderPooling(r.nextBoolean()); @@ -780,6 +798,10 @@ public abstract class LuceneTestCase extends Assert { return newLogMergePolicy(random); } + public static TieredMergePolicy newTieredMergePolicy() { + return newTieredMergePolicy(random); + } + public static LogMergePolicy newLogMergePolicy(Random r) { LogMergePolicy logmp = r.nextBoolean() ? new LogDocMergePolicy() : new LogByteSizeMergePolicy(); logmp.setUseCompoundFile(r.nextBoolean()); @@ -792,17 +814,22 @@ public abstract class LuceneTestCase extends Assert { return logmp; } - public static LogMergePolicy newInOrderLogMergePolicy() { - LogMergePolicy logmp = newLogMergePolicy(); - logmp.setRequireContiguousMerge(true); - return logmp; - } - - public static LogMergePolicy newInOrderLogMergePolicy(int mergeFactor) { - LogMergePolicy logmp = newLogMergePolicy(); - logmp.setMergeFactor(mergeFactor); - logmp.setRequireContiguousMerge(true); - return logmp; + public static TieredMergePolicy newTieredMergePolicy(Random r) { + TieredMergePolicy tmp = new TieredMergePolicy(); + if (r.nextInt(3) == 2) { + tmp.setMaxMergeAtOnce(2); + tmp.setMaxMergeAtOnceExplicit(2); + } else { + tmp.setMaxMergeAtOnce(_TestUtil.nextInt(r, 2, 20)); + tmp.setMaxMergeAtOnceExplicit(_TestUtil.nextInt(r, 2, 30)); + } + tmp.setMaxMergedSegmentMB(0.2 + r.nextDouble() * 2.0); + tmp.setFloorSegmentMB(0.2 + r.nextDouble() * 2.0); + tmp.setExpungeDeletesPctAllowed(0.0 + r.nextDouble() * 30.0); + tmp.setSegmentsPerTier(_TestUtil.nextInt(r, 2, 20)); + tmp.setUseCompoundFile(r.nextBoolean()); + tmp.setNoCFSRatio(0.1 + r.nextDouble()*0.8); + return tmp; } public static LogMergePolicy newLogMergePolicy(boolean useCFS) { @@ -837,7 +864,7 @@ public abstract class LuceneTestCase extends Assert { public static MockDirectoryWrapper newDirectory() throws IOException { return newDirectory(random); } - + /** * Returns a new Directory instance, using the specified random. * See {@link #newDirectory()} for more information. @@ -848,7 +875,7 @@ public abstract class LuceneTestCase extends Assert { stores.put(dir, Thread.currentThread().getStackTrace()); return dir; } - + /** * Returns a new Directory instance, with contents copied from the * provided directory. See {@link #newDirectory()} for more @@ -857,23 +884,23 @@ public abstract class LuceneTestCase extends Assert { public static MockDirectoryWrapper newDirectory(Directory d) throws IOException { return newDirectory(random, d); } - + /** Returns a new FSDirectory instance over the given file, which must be a folder. */ public static MockDirectoryWrapper newFSDirectory(File f) throws IOException { return newFSDirectory(f, null); } - + /** Returns a new FSDirectory instance over the given file, which must be a folder. */ public static MockDirectoryWrapper newFSDirectory(File f, LockFactory lf) throws IOException { String fsdirClass = TEST_DIRECTORY; if (fsdirClass.equals("random")) { fsdirClass = FS_DIRECTORIES[random.nextInt(FS_DIRECTORIES.length)]; } - + if (fsdirClass.indexOf(".") == -1) {// if not fully qualified, assume .store fsdirClass = "org.apache.lucene.store." + fsdirClass; } - + Class clazz; try { try { @@ -881,11 +908,11 @@ public abstract class LuceneTestCase extends Assert { } catch (ClassCastException e) { // TEST_DIRECTORY is not a sub-class of FSDirectory, so draw one at random fsdirClass = FS_DIRECTORIES[random.nextInt(FS_DIRECTORIES.length)]; - + if (fsdirClass.indexOf(".") == -1) {// if not fully qualified, assume .store fsdirClass = "org.apache.lucene.store." + fsdirClass; } - + clazz = Class.forName(fsdirClass).asSubclass(FSDirectory.class); } MockDirectoryWrapper dir = new MockDirectoryWrapper(random, newFSDirectoryImpl(clazz, f, lf)); @@ -895,7 +922,7 @@ public abstract class LuceneTestCase extends Assert { throw new RuntimeException(e); } } - + /** * Returns a new Directory instance, using the specified random * with contents copied from the provided directory. See @@ -953,44 +980,44 @@ public abstract class LuceneTestCase extends Assert { public static Field newField(Random random, String name, String value, Store store, Index index, TermVector tv) { if (!index.isIndexed()) return new Field(name, value, store, index); - + if (!store.isStored() && random.nextBoolean()) store = Store.YES; // randomly store it - + tv = randomTVSetting(random, tv); - + return new Field(name, value, store, index, tv); } - - static final TermVector tvSettings[] = { - TermVector.NO, TermVector.YES, TermVector.WITH_OFFSETS, - TermVector.WITH_POSITIONS, TermVector.WITH_POSITIONS_OFFSETS + + static final TermVector tvSettings[] = { + TermVector.NO, TermVector.YES, TermVector.WITH_OFFSETS, + TermVector.WITH_POSITIONS, TermVector.WITH_POSITIONS_OFFSETS }; - + private static TermVector randomTVSetting(Random random, TermVector minimum) { switch(minimum) { case NO: return tvSettings[_TestUtil.nextInt(random, 0, tvSettings.length-1)]; case YES: return tvSettings[_TestUtil.nextInt(random, 1, tvSettings.length-1)]; - case WITH_OFFSETS: return random.nextBoolean() ? TermVector.WITH_OFFSETS + case WITH_OFFSETS: return random.nextBoolean() ? TermVector.WITH_OFFSETS : TermVector.WITH_POSITIONS_OFFSETS; - case WITH_POSITIONS: return random.nextBoolean() ? TermVector.WITH_POSITIONS + case WITH_POSITIONS: return random.nextBoolean() ? TermVector.WITH_POSITIONS : TermVector.WITH_POSITIONS_OFFSETS; default: return TermVector.WITH_POSITIONS_OFFSETS; } } - + /** return a random Locale from the available locales on the system */ public static Locale randomLocale(Random random) { Locale locales[] = Locale.getAvailableLocales(); return locales[random.nextInt(locales.length)]; } - + /** return a random TimeZone from the available timezones on the system */ public static TimeZone randomTimeZone(Random random) { String tzIds[] = TimeZone.getAvailableIDs(); return TimeZone.getTimeZone(tzIds[random.nextInt(tzIds.length)]); } - + /** return a Locale object equivalent to its programmatic name */ public static Locale localeForName(String localeName) { String elements[] = localeName.split("\\_"); @@ -1012,7 +1039,7 @@ public abstract class LuceneTestCase extends Assert { "RAMDirectory", FS_DIRECTORIES[0], FS_DIRECTORIES[1], FS_DIRECTORIES[2] }; - + public static String randomDirectory(Random random) { if (random.nextInt(10) == 0) { return CORE_DIRECTORIES[random.nextInt(CORE_DIRECTORIES.length)]; @@ -1024,20 +1051,21 @@ public abstract class LuceneTestCase extends Assert { private static Directory newFSDirectoryImpl( Class clazz, File file, LockFactory lockFactory) throws IOException { + FSDirectory d = null; try { // Assuming every FSDirectory has a ctor(File), but not all may take a // LockFactory too, so setting it afterwards. Constructor ctor = clazz.getConstructor(File.class); - FSDirectory d = ctor.newInstance(file); - if (lockFactory != null) { - d.setLockFactory(lockFactory); - } - return d; + d = ctor.newInstance(file); } catch (Exception e) { - return FSDirectory.open(file); + d = FSDirectory.open(file); } + if (lockFactory != null) { + d.setLockFactory(lockFactory); + } + return d; } - + static Directory newDirectoryImpl(Random random, String clazzName) { if (clazzName.equals("random")) clazzName = randomDirectory(random); @@ -1050,6 +1078,7 @@ public abstract class LuceneTestCase extends Assert { final File tmpFile = File.createTempFile("test", "tmp", TEMP_DIR); tmpFile.delete(); tmpFile.mkdir(); + tempDirs.add(tmpFile.getAbsolutePath()); return newFSDirectoryImpl(clazz.asSubclass(FSDirectory.class), tmpFile, null); } @@ -1057,18 +1086,31 @@ public abstract class LuceneTestCase extends Assert { return clazz.newInstance(); } catch (Exception e) { throw new RuntimeException(e); - } + } } - + /** create a new searcher over the reader. * This searcher might randomly use threads. */ public static IndexSearcher newSearcher(IndexReader r) throws IOException { + return newSearcher(r, true); + } + + /** create a new searcher over the reader. + * This searcher might randomly use threads. + * if maybeWrap is true, this searcher might wrap the reader + * with one that returns null for getSequentialSubReaders. + */ + public static IndexSearcher newSearcher(IndexReader r, boolean maybeWrap) throws IOException { if (random.nextBoolean()) { - return new IndexSearcher(r); + if (maybeWrap && random.nextBoolean()) { + return new IndexSearcher(new SlowMultiReaderWrapper(r)); + } else { + return new IndexSearcher(r); + } } else { int threads = 0; - final ExecutorService ex = (random.nextBoolean()) ? null - : Executors.newFixedThreadPool(threads = _TestUtil.nextInt(random, 1, 8), + final ExecutorService ex = (random.nextBoolean()) ? null + : Executors.newFixedThreadPool(threads = _TestUtil.nextInt(random, 1, 8), new NamedThreadFactory("LuceneTestCase")); if (ex != null && VERBOSE) { System.out.println("NOTE: newSearcher using ExecutorService with " + threads + " threads"); @@ -1093,12 +1135,12 @@ public abstract class LuceneTestCase extends Assert { public String getName() { return this.name; } - + /** Gets a resource from the classpath as {@link File}. This method should only be used, * if a real file is needed. To get a stream, code should prefer * {@link Class#getResourceAsStream} using {@code this.getClass()}. */ - + protected File getDataFile(String name) throws IOException { try { return new File(this.getClass().getResource(name).toURI()); @@ -1109,11 +1151,11 @@ public abstract class LuceneTestCase extends Assert { // We get here from InterceptTestCaseEvents on the 'failed' event.... public void reportAdditionalFailureInfo() { - System.err.println("NOTE: reproduce with: ant test -Dtestcase=" + getClass().getSimpleName() + System.err.println("NOTE: reproduce with: ant test -Dtestcase=" + getClass().getSimpleName() + " -Dtestmethod=" + getName() + " -Dtests.seed=" + new TwoLongs(staticSeed, seed) + reproduceWithExtraParams()); } - + // extra params that were overridden needed to reproduce the command private String reproduceWithExtraParams() { StringBuilder sb = new StringBuilder(); @@ -1129,12 +1171,12 @@ public abstract class LuceneTestCase extends Assert { private static long staticSeed; // seed for individual test methods, changed in @before private long seed; - + private static final Random seedRand = new Random(); protected static final Random random = new Random(0); private String name = ""; - + /** * Annotation for tests that should only be run during nightly builds. */ @@ -1142,7 +1184,7 @@ public abstract class LuceneTestCase extends Assert { @Inherited @Retention(RetentionPolicy.RUNTIME) public @interface Nightly {} - + /** optionally filters the tests to be run by TEST_METHOD */ public static class LuceneTestCaseRunner extends BlockJUnit4ClassRunner { private List testMethods; @@ -1172,11 +1214,11 @@ public abstract class LuceneTestCase extends Assert { testMethods.add(new FrameworkMethod(m)); } } - + if (testMethods.isEmpty()) { throw new RuntimeException("No runnable methods!"); } - + if (TEST_NIGHTLY == false) { if (getTestClass().getJavaClass().isAnnotationPresent(Nightly.class)) { /* the test class is annotated with nightly, remove all methods */ @@ -1208,11 +1250,22 @@ public abstract class LuceneTestCase extends Assert { if (VERBOSE) { System.out.println("\nNOTE: running test " + arg0.getName()); } + + // only print iteration info if the user requested more than one iterations + boolean verbose = VERBOSE && TEST_ITER > 1; for (int i = 0; i < TEST_ITER; i++) { - if (VERBOSE && TEST_ITER > 1) { + if (verbose) { System.out.println("\nNOTE: running iter=" + (1+i) + " of " + TEST_ITER); } super.runChild(arg0, arg1); + if (testsFailed) { + if (i >= TEST_ITER_MIN - 1) { + if (verbose) { + System.out.println("\nNOTE: iteration " + i + " failed !"); + } + break; + } + } } } @@ -1226,9 +1279,9 @@ public abstract class LuceneTestCase extends Assert { @Override public boolean shouldRun(Description d) { return TEST_METHOD == null || d.getMethodName().equals(TEST_METHOD); - } + } }; - + try { f.apply(this); } catch (NoTestsRemainException e) { @@ -1236,12 +1289,12 @@ public abstract class LuceneTestCase extends Assert { } } } - + private static class RandomCodecProvider extends CodecProvider { private List knownCodecs = new ArrayList(); private Map previousMappings = new HashMap(); private final int perFieldSeed; - + RandomCodecProvider(Random random) { this.perFieldSeed = random.nextInt(); register(new StandardCodec()); @@ -1273,13 +1326,13 @@ public abstract class LuceneTestCase extends Assert { } return codec.name; } - + @Override public synchronized String toString() { return "RandomCodecProvider: " + previousMappings.toString(); } } - + @Ignore("just a hack") public final void alwaysIgnoredTestMethod() {} } diff --git a/lucene/src/test-framework/org/apache/lucene/util/ThrottledIndexOutput.java b/lucene/src/test-framework/org/apache/lucene/util/ThrottledIndexOutput.java new file mode 100644 index 00000000000..52333bd2cd7 --- /dev/null +++ b/lucene/src/test-framework/org/apache/lucene/util/ThrottledIndexOutput.java @@ -0,0 +1,147 @@ +package org.apache.lucene.util; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +import java.io.IOException; + +import org.apache.lucene.store.DataInput; +import org.apache.lucene.store.IndexOutput; + +public class ThrottledIndexOutput extends IndexOutput { + public static final int DEFAULT_MIN_WRITTEN_BYTES = 1024; + private final int bytesPerSecond; + private IndexOutput delegate; + private long flushDelayMillis; + private long closeDelayMillis; + private long seekDelayMillis; + private long pendingBytes; + private long minBytesWritten; + private long timeElapsed; + private final byte[] bytes = new byte[1]; + + public ThrottledIndexOutput newFromDelegate(IndexOutput output) { + return new ThrottledIndexOutput(bytesPerSecond, flushDelayMillis, + closeDelayMillis, seekDelayMillis, minBytesWritten, output); + } + + public ThrottledIndexOutput(int bytesPerSecond, long delayInMillis, + IndexOutput delegate) { + this(bytesPerSecond, delayInMillis, delayInMillis, delayInMillis, + DEFAULT_MIN_WRITTEN_BYTES, delegate); + } + + public ThrottledIndexOutput(int bytesPerSecond, long delays, + int minBytesWritten, IndexOutput delegate) { + this(bytesPerSecond, delays, delays, delays, minBytesWritten, delegate); + } + + public static final int mBitsToBytes(int mbits) { + return mbits * 125000; + } + + public ThrottledIndexOutput(int bytesPerSecond, long flushDelayMillis, + long closeDelayMillis, long seekDelayMillis, long minBytesWritten, + IndexOutput delegate) { + assert bytesPerSecond > 0; + this.delegate = delegate; + this.bytesPerSecond = bytesPerSecond; + this.flushDelayMillis = flushDelayMillis; + this.closeDelayMillis = closeDelayMillis; + this.seekDelayMillis = seekDelayMillis; + this.minBytesWritten = minBytesWritten; + } + + @Override + public void flush() throws IOException { + sleep(flushDelayMillis); + delegate.flush(); + } + + @Override + public void close() throws IOException { + sleep(closeDelayMillis + getDelay(true)); + delegate.close(); + + } + + @Override + public long getFilePointer() { + return delegate.getFilePointer(); + } + + @Override + public void seek(long pos) throws IOException { + sleep(seekDelayMillis); + delegate.seek(pos); + } + + @Override + public long length() throws IOException { + return delegate.length(); + } + + @Override + public void writeByte(byte b) throws IOException { + bytes[0] = b; + writeBytes(bytes, 0, 1); + } + + @Override + public void writeBytes(byte[] b, int offset, int length) throws IOException { + final long before = System.nanoTime(); + delegate.writeBytes(b, offset, length); + timeElapsed += System.nanoTime() - before; + pendingBytes += length; + sleep(getDelay(false)); + + } + + protected long getDelay(boolean closing) { + if (pendingBytes > 0 && (closing || pendingBytes > minBytesWritten)) { + long actualBps = (timeElapsed / pendingBytes) * 1000000000l; // nano to sec + if (actualBps > bytesPerSecond) { + long expected = (pendingBytes * 1000l / bytesPerSecond) ; + final long delay = expected - (timeElapsed / 1000000l) ; + pendingBytes = 0; + timeElapsed = 0; + return delay; + } + } + return 0; + + } + + private static final void sleep(long ms) { + if (ms <= 0) + return; + try { + Thread.sleep(ms); + } catch (InterruptedException e) { + throw new ThreadInterruptedException(e); + } + } + + @Override + public void setLength(long length) throws IOException { + delegate.setLength(length); + } + + @Override + public void copyBytes(DataInput input, long numBytes) throws IOException { + delegate.copyBytes(input, numBytes); + } +} diff --git a/lucene/src/test-framework/org/apache/lucene/util/_TestUtil.java b/lucene/src/test-framework/org/apache/lucene/util/_TestUtil.java index 456374b350b..a2003215737 100644 --- a/lucene/src/test-framework/org/apache/lucene/util/_TestUtil.java +++ b/lucene/src/test-framework/org/apache/lucene/util/_TestUtil.java @@ -34,8 +34,6 @@ import java.util.HashMap; import java.util.zip.ZipEntry; import java.util.zip.ZipFile; -import org.junit.Assert; - import org.apache.lucene.document.Document; import org.apache.lucene.document.Fieldable; import org.apache.lucene.index.CheckIndex; @@ -43,17 +41,22 @@ import org.apache.lucene.index.ConcurrentMergeScheduler; import org.apache.lucene.index.FieldInfos; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.LogMergePolicy; +import org.apache.lucene.index.MergePolicy; import org.apache.lucene.index.MergeScheduler; +import org.apache.lucene.index.TieredMergePolicy; import org.apache.lucene.index.codecs.Codec; import org.apache.lucene.index.codecs.CodecProvider; import org.apache.lucene.store.Directory; +import org.junit.Assert; public class _TestUtil { /** Returns temp dir, containing String arg in its name; * does not create the directory. */ public static File getTempDir(String desc) { - return new File(LuceneTestCase.TEMP_DIR, desc + "." + new Random().nextLong()); + File f = new File(LuceneTestCase.TEMP_DIR, desc + "." + new Random().nextLong()); + LuceneTestCase.tempDirs.add(f.getAbsolutePath()); + return f; } /** @@ -88,6 +91,7 @@ public class _TestUtil { rmDir(destDir); destDir.mkdir(); + LuceneTestCase.tempDirs.add(destDir.getAbsolutePath()); while (entries.hasMoreElements()) { ZipEntry entry = entries.nextElement(); @@ -157,6 +161,19 @@ public class _TestUtil { return start + r.nextInt(end-start+1); } + public static String randomSimpleString(Random r) { + final int end = r.nextInt(10); + if (end == 0) { + // allow 0 length + return ""; + } + final char[] buffer = new char[end]; + for (int i = 0; i < end; i++) { + buffer[i] = (char) _TestUtil.nextInt(r, 97, 102); + } + return new String(buffer, 0, end); + } + /** Returns random string, including full unicode range. */ public static String randomUnicodeString(Random r) { return randomUnicodeString(r, 20); @@ -172,22 +189,35 @@ public class _TestUtil { return ""; } final char[] buffer = new char[end]; - for (int i = 0; i < end; i++) { - int t = r.nextInt(5); + randomFixedLengthUnicodeString(r, buffer, 0, buffer.length); + return new String(buffer, 0, end); + } - if (0 == t && i < end - 1) { + /** + * Fills provided char[] with valid random unicode code + * unit sequence. + */ + public static void randomFixedLengthUnicodeString(Random random, char[] chars, int offset, int length) { + int i = offset; + final int end = offset + length; + while(i < end) { + final int t = random.nextInt(5); + if (0 == t && i < length - 1) { // Make a surrogate pair // High surrogate - buffer[i++] = (char) nextInt(r, 0xd800, 0xdbff); + chars[i++] = (char) nextInt(random, 0xd800, 0xdbff); // Low surrogate - buffer[i] = (char) nextInt(r, 0xdc00, 0xdfff); + chars[i++] = (char) nextInt(random, 0xdc00, 0xdfff); + } else if (t <= 1) { + chars[i++] = (char) random.nextInt(0x80); + } else if (2 == t) { + chars[i++] = (char) nextInt(random, 0x80, 0x800); + } else if (3 == t) { + chars[i++] = (char) nextInt(random, 0x800, 0xd7ff); + } else if (4 == t) { + chars[i++] = (char) nextInt(random, 0xe000, 0xffff); } - else if (t <= 1) buffer[i] = (char) r.nextInt(0x80); - else if (2 == t) buffer[i] = (char) nextInt(r, 0x80, 0x800); - else if (3 == t) buffer[i] = (char) nextInt(r, 0x800, 0xd7ff); - else if (4 == t) buffer[i] = (char) nextInt(r, 0xe000, 0xffff); } - return new String(buffer, 0, end); } private static final int[] blockStarts = { @@ -325,9 +355,14 @@ public class _TestUtil { * count lowish */ public static void reduceOpenFiles(IndexWriter w) { // keep number of open files lowish - LogMergePolicy lmp = (LogMergePolicy) w.getConfig().getMergePolicy(); - lmp.setMergeFactor(Math.min(5, lmp.getMergeFactor())); - + MergePolicy mp = w.getConfig().getMergePolicy(); + if (mp instanceof LogMergePolicy) { + LogMergePolicy lmp = (LogMergePolicy) mp; + lmp.setMergeFactor(Math.min(5, lmp.getMergeFactor())); + } else if (mp instanceof TieredMergePolicy) { + TieredMergePolicy tmp = (TieredMergePolicy) mp; + tmp.setMaxMergeAtOnce(Math.min(5, tmp.getMaxMergeAtOnce())); + } MergeScheduler ms = w.getConfig().getMergeScheduler(); if (ms instanceof ConcurrentMergeScheduler) { ((ConcurrentMergeScheduler) ms).setMaxThreadCount(2); diff --git a/lucene/src/test-framework/overview.html b/lucene/src/test-framework/overview.html index d5c0077861c..608eb070772 100644 --- a/lucene/src/test-framework/overview.html +++ b/lucene/src/test-framework/overview.html @@ -1,28 +1,28 @@ - - - - Apache Lucene Test Framework API - - -

- The Lucene Test Framework is used by Lucene as the basis for its tests. - The framework can also be used for testing third-party code that uses - the Lucene API. -

- - + + + + Apache Lucene Test Framework API + + +

+ The Lucene Test Framework is used by Lucene as the basis for its tests. + The framework can also be used for testing third-party code that uses + the Lucene API. +

+ + diff --git a/lucene/src/test/org/apache/lucene/TestDemo.java b/lucene/src/test/org/apache/lucene/TestDemo.java index 60eba543ece..33884dd4627 100644 --- a/lucene/src/test/org/apache/lucene/TestDemo.java +++ b/lucene/src/test/org/apache/lucene/TestDemo.java @@ -43,13 +43,13 @@ import org.apache.lucene.util.LuceneTestCase; public class TestDemo extends LuceneTestCase { public void testDemo() throws IOException, ParseException { - Analyzer analyzer = new MockAnalyzer(); + Analyzer analyzer = new MockAnalyzer(random); // Store the index in memory: Directory directory = newDirectory(); // To store an index on disk, use this instead: //Directory directory = FSDirectory.open("/tmp/testindex"); - RandomIndexWriter iwriter = new RandomIndexWriter(random, directory); + RandomIndexWriter iwriter = new RandomIndexWriter(random, directory, analyzer); iwriter.w.setInfoStream(VERBOSE ? System.out : null); Document doc = new Document(); String longTerm = "longtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongterm"; diff --git a/lucene/src/test/org/apache/lucene/TestExternalCodecs.java b/lucene/src/test/org/apache/lucene/TestExternalCodecs.java index fe8f2ad81ff..b8ede7f4111 100644 --- a/lucene/src/test/org/apache/lucene/TestExternalCodecs.java +++ b/lucene/src/test/org/apache/lucene/TestExternalCodecs.java @@ -26,8 +26,6 @@ import org.apache.lucene.analysis.*; import org.apache.lucene.index.codecs.*; import org.apache.lucene.index.codecs.standard.*; import org.apache.lucene.index.codecs.pulsing.*; -import org.apache.lucene.index.values.DocValues; -import org.apache.lucene.index.codecs.docvalues.DocValuesConsumer; import org.apache.lucene.store.*; import java.util.*; import java.io.*; @@ -174,13 +172,6 @@ public class TestExternalCodecs extends LuceneTestCase { public void close() { // TODO: finalize stuff } - - @Override - public DocValuesConsumer addValuesField(FieldInfo field) - throws IOException { - //TODO(simonw): can we fix this easily? - throw new UnsupportedOperationException("no implemented"); - } } private static class RAMTermsConsumer extends TermsConsumer { @@ -280,11 +271,6 @@ public class TestExternalCodecs extends LuceneTestCase { public TermsEnum terms() { return new RAMTermsEnum(postings.fieldToTerms.get(current)); } - - @Override - public DocValues docValues() throws IOException { - throw new UnsupportedOperationException("not implemented"); - } } static class RAMTermsEnum extends TermsEnum { @@ -498,7 +484,9 @@ public class TestExternalCodecs extends LuceneTestCase { public FieldsProducer fieldsProducer(SegmentReadState readState) throws IOException { - return state.get(readState.segmentInfo.name); + synchronized(state) { + return state.get(readState.segmentInfo.name); + } } @Override @@ -523,7 +511,7 @@ public class TestExternalCodecs extends LuceneTestCase { dir.setCheckIndexOnClose(false); // we use a custom codec provider IndexWriter w = new IndexWriter( dir, - newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(MockTokenizer.WHITESPACE, true, true)). + newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)). setCodecProvider(provider). setMergePolicy(newLogMergePolicy(3)) ); diff --git a/lucene/src/test/org/apache/lucene/TestMergeSchedulerExternal.java b/lucene/src/test/org/apache/lucene/TestMergeSchedulerExternal.java index cd6ebb358f0..6d61cc0a15b 100644 --- a/lucene/src/test/org/apache/lucene/TestMergeSchedulerExternal.java +++ b/lucene/src/test/org/apache/lucene/TestMergeSchedulerExternal.java @@ -90,7 +90,7 @@ public class TestMergeSchedulerExternal extends LuceneTestCase { doc.add(idField); IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( - TEST_VERSION_CURRENT, new MockAnalyzer()).setMergeScheduler(new MyMergeScheduler()) + TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMergeScheduler(new MyMergeScheduler()) .setMaxBufferedDocs(2).setRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH) .setMergePolicy(newLogMergePolicy())); LogMergePolicy logMP = (LogMergePolicy) writer.getConfig().getMergePolicy(); diff --git a/lucene/src/test/org/apache/lucene/TestSearch.java b/lucene/src/test/org/apache/lucene/TestSearch.java index 619a60485a6..a79b5c2c7a5 100644 --- a/lucene/src/test/org/apache/lucene/TestSearch.java +++ b/lucene/src/test/org/apache/lucene/TestSearch.java @@ -72,7 +72,7 @@ public class TestSearch extends LuceneTestCase { private void doTestSearch(Random random, PrintWriter out, boolean useCompoundFile) throws Exception { Directory directory = newDirectory(); - Analyzer analyzer = new MockAnalyzer(); + Analyzer analyzer = new MockAnalyzer(random); IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer); MergePolicy mp = conf.getMergePolicy(); if (mp instanceof LogMergePolicy) { diff --git a/lucene/src/test/org/apache/lucene/TestSearchForDuplicates.java b/lucene/src/test/org/apache/lucene/TestSearchForDuplicates.java index aec32f66285..7dbec0d73a5 100644 --- a/lucene/src/test/org/apache/lucene/TestSearchForDuplicates.java +++ b/lucene/src/test/org/apache/lucene/TestSearchForDuplicates.java @@ -78,7 +78,7 @@ public class TestSearchForDuplicates extends LuceneTestCase { private void doTest(Random random, PrintWriter out, boolean useCompoundFiles) throws Exception { Directory directory = newDirectory(); - Analyzer analyzer = new MockAnalyzer(); + Analyzer analyzer = new MockAnalyzer(random); IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer); final MergePolicy mp = conf.getMergePolicy(); if (mp instanceof LogMergePolicy) { diff --git a/lucene/src/test/org/apache/lucene/analysis/TestMockAnalyzer.java b/lucene/src/test/org/apache/lucene/analysis/TestMockAnalyzer.java index 387b2ee0f7c..e5ec6fad862 100644 --- a/lucene/src/test/org/apache/lucene/analysis/TestMockAnalyzer.java +++ b/lucene/src/test/org/apache/lucene/analysis/TestMockAnalyzer.java @@ -1,5 +1,6 @@ package org.apache.lucene.analysis; +import java.io.StringReader; import java.util.Arrays; import org.apache.lucene.util.automaton.Automaton; @@ -29,7 +30,7 @@ public class TestMockAnalyzer extends BaseTokenStreamTestCase { /** Test a configuration that behaves a lot like WhitespaceAnalyzer */ public void testWhitespace() throws Exception { - Analyzer a = new MockAnalyzer(); + Analyzer a = new MockAnalyzer(random); assertAnalyzesTo(a, "A bc defg hiJklmn opqrstuv wxy z ", new String[] { "a", "bc", "defg", "hijklmn", "opqrstuv", "wxy", "z" }); assertAnalyzesToReuse(a, "aba cadaba shazam", @@ -40,7 +41,7 @@ public class TestMockAnalyzer extends BaseTokenStreamTestCase { /** Test a configuration that behaves a lot like SimpleAnalyzer */ public void testSimple() throws Exception { - Analyzer a = new MockAnalyzer(MockTokenizer.SIMPLE, true); + Analyzer a = new MockAnalyzer(random, MockTokenizer.SIMPLE, true); assertAnalyzesTo(a, "a-bc123 defg+hijklmn567opqrstuv78wxy_z ", new String[] { "a", "bc", "defg", "hijklmn", "opqrstuv", "wxy", "z" }); assertAnalyzesToReuse(a, "aba4cadaba-Shazam", @@ -51,7 +52,7 @@ public class TestMockAnalyzer extends BaseTokenStreamTestCase { /** Test a configuration that behaves a lot like KeywordAnalyzer */ public void testKeyword() throws Exception { - Analyzer a = new MockAnalyzer(MockTokenizer.KEYWORD, false); + Analyzer a = new MockAnalyzer(random, MockTokenizer.KEYWORD, false); assertAnalyzesTo(a, "a-bc123 defg+hijklmn567opqrstuv78wxy_z ", new String[] { "a-bc123 defg+hijklmn567opqrstuv78wxy_z " }); assertAnalyzesToReuse(a, "aba4cadaba-Shazam", @@ -62,13 +63,13 @@ public class TestMockAnalyzer extends BaseTokenStreamTestCase { /** Test a configuration that behaves a lot like StopAnalyzer */ public void testStop() throws Exception { - Analyzer a = new MockAnalyzer(MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET, true); + Analyzer a = new MockAnalyzer(random, MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET, true); assertAnalyzesTo(a, "the quick brown a fox", new String[] { "quick", "brown", "fox" }, new int[] { 2, 1, 2 }); // disable positions - a = new MockAnalyzer(MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET, false); + a = new MockAnalyzer(random, MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET, false); assertAnalyzesTo(a, "the quick brown a fox", new String[] { "quick", "brown", "fox" }, new int[] { 1, 1, 1 }); @@ -81,7 +82,7 @@ public class TestMockAnalyzer extends BaseTokenStreamTestCase { BasicOperations.complement( Automaton.union( Arrays.asList(BasicAutomata.makeString("foo"), BasicAutomata.makeString("bar"))))); - Analyzer a = new MockAnalyzer(MockTokenizer.SIMPLE, true, keepWords, true); + Analyzer a = new MockAnalyzer(random, MockTokenizer.SIMPLE, true, keepWords, true); assertAnalyzesTo(a, "quick foo brown bar bar fox foo", new String[] { "foo", "bar", "bar", "foo" }, new int[] { 2, 2, 1, 2 }); @@ -90,9 +91,28 @@ public class TestMockAnalyzer extends BaseTokenStreamTestCase { /** Test a configuration that behaves a lot like LengthFilter */ public void testLength() throws Exception { CharacterRunAutomaton length5 = new CharacterRunAutomaton(new RegExp(".{5,}").toAutomaton()); - Analyzer a = new MockAnalyzer(MockTokenizer.WHITESPACE, true, length5, true); + Analyzer a = new MockAnalyzer(random, MockTokenizer.WHITESPACE, true, length5, true); assertAnalyzesTo(a, "ok toolong fine notfine", new String[] { "ok", "fine" }, new int[] { 1, 2 }); } + + public void testLUCENE_3042() throws Exception { + String testString = "t"; + + Analyzer analyzer = new MockAnalyzer(random); + TokenStream stream = analyzer.reusableTokenStream("dummy", new StringReader(testString)); + stream.reset(); + while (stream.incrementToken()) { + // consume + } + stream.end(); + + assertAnalyzesToReuse(analyzer, testString, new String[] { "t" }); + } + + /** blast some random strings through the analyzer */ + public void testRandomStrings() throws Exception { + checkRandomData(random, new MockAnalyzer(random), 10000*RANDOM_MULTIPLIER); + } } diff --git a/lucene/src/test/org/apache/lucene/index/Test2BTerms.java b/lucene/src/test/org/apache/lucene/index/Test2BTerms.java index cca648de93d..25cf0c4d987 100644 --- a/lucene/src/test/org/apache/lucene/index/Test2BTerms.java +++ b/lucene/src/test/org/apache/lucene/index/Test2BTerms.java @@ -19,11 +19,18 @@ package org.apache.lucene.index; import org.apache.lucene.util.*; import org.apache.lucene.store.*; +import org.apache.lucene.search.*; import org.apache.lucene.analysis.*; import org.apache.lucene.analysis.tokenattributes.*; import org.apache.lucene.document.*; import org.apache.lucene.index.codecs.CodecProvider; +import java.io.File; import java.io.IOException; +import java.io.UnsupportedEncodingException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.Random; import org.junit.Ignore; // NOTE: this test will fail w/ PreFlexRW codec! (Because @@ -36,7 +43,7 @@ import org.junit.Ignore; // // ant compile-test // -// java -server -Xmx2g -Xms2g -d64 -cp .:lib/junit-4.7.jar:./build/classes/test:./build/classes/java -Dlucene.version=4.0-dev -Dtests.directory=SimpleFSDirectory -Dtests.codec=Standard -DtempDir=build -ea org.junit.runner.JUnitCore org.apache.lucene.index.Test2BTerms +// java -server -Xmx8g -d64 -cp .:lib/junit-4.7.jar:./build/classes/test:./build/classes/test-framework:./build/classes/java -Dlucene.version=4.0-dev -Dtests.directory=MMapDirectory -DtempDir=build -ea org.junit.runner.JUnitCore org.apache.lucene.index.Test2BTerms // public class Test2BTerms extends LuceneTestCase { @@ -45,17 +52,21 @@ public class Test2BTerms extends LuceneTestCase { private final static BytesRef bytes = new BytesRef(TOKEN_LEN); - private static final class MyTokenStream extends TokenStream { + private final static class MyTokenStream extends TokenStream { private final int tokensPerDoc; private int tokenCount; - private int byteUpto; + public final List savedTerms = new ArrayList(); + private int nextSave; + private final Random random; - public MyTokenStream(int tokensPerDoc) { + public MyTokenStream(Random random, int tokensPerDoc) { super(new MyAttributeFactory(AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY)); this.tokensPerDoc = tokensPerDoc; addAttribute(TermToBytesRefAttribute.class); bytes.length = TOKEN_LEN; + this.random = random; + nextSave = _TestUtil.nextInt(random, 500000, 1000000); } @Override @@ -65,6 +76,11 @@ public class Test2BTerms extends LuceneTestCase { } random.nextBytes(bytes.bytes); tokenCount++; + if (--nextSave == 0) { + savedTerms.add(new BytesRef(bytes)); + System.out.println("TEST: save term=" + bytes); + nextSave = _TestUtil.nextInt(random, 500000, 1000000); + } return true; } @@ -131,47 +147,122 @@ public class Test2BTerms extends LuceneTestCase { throw new RuntimeException("thist test cannot run with PreFlex codec"); } - long TERM_COUNT = ((long) Integer.MAX_VALUE) + 100000000; + final long TERM_COUNT = ((long) Integer.MAX_VALUE) + 100000000; - int TERMS_PER_DOC = 1000000; + final int TERMS_PER_DOC = _TestUtil.nextInt(random, 100000, 1000000); + + List savedTerms = null; Directory dir = newFSDirectory(_TestUtil.getTempDir("2BTerms")); - IndexWriter w = new IndexWriter( - dir, - new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()). - setMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH). - setRAMBufferSizeMB(256.0). - setMergeScheduler(new ConcurrentMergeScheduler()). - setMergePolicy(newLogMergePolicy(false, 10)) - ); + //Directory dir = newFSDirectory(new File("/p/lucene/indices/2bindex")); - MergePolicy mp = w.getConfig().getMergePolicy(); - if (mp instanceof LogByteSizeMergePolicy) { - // 1 petabyte: - ((LogByteSizeMergePolicy) mp).setMaxMergeMB(1024*1024*1024); + if (true) { + + IndexWriter w = new IndexWriter(dir, + new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)) + .setMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH) + .setRAMBufferSizeMB(256.0) + .setMergeScheduler(new ConcurrentMergeScheduler()) + .setMergePolicy(newLogMergePolicy(false, 10)) + .setOpenMode(IndexWriterConfig.OpenMode.CREATE)); + + MergePolicy mp = w.getConfig().getMergePolicy(); + if (mp instanceof LogByteSizeMergePolicy) { + // 1 petabyte: + ((LogByteSizeMergePolicy) mp).setMaxMergeMB(1024*1024*1024); + } + + Document doc = new Document(); + final MyTokenStream ts = new MyTokenStream(random, TERMS_PER_DOC); + Field field = new Field("field", ts); + field.setOmitTermFreqAndPositions(true); + field.setOmitNorms(true); + doc.add(field); + //w.setInfoStream(System.out); + final int numDocs = (int) (TERM_COUNT/TERMS_PER_DOC); + + System.out.println("TERMS_PER_DOC=" + TERMS_PER_DOC); + System.out.println("numDocs=" + numDocs); + + for(int i=0;i bigOrdTerms = new ArrayList(savedTerms.subList(numSavedTerms-10, numSavedTerms)); + System.out.println("TEST: test big ord terms..."); + testSavedTerms(r, bigOrdTerms); + System.out.println("TEST: test all saved terms..."); + testSavedTerms(r, savedTerms); + r.close(); - System.out.println("now CheckIndex..."); + System.out.println("TEST: now CheckIndex..."); CheckIndex.Status status = _TestUtil.checkIndex(dir); final long tc = status.segmentInfos.get(0).termIndexStatus.termCount; assertTrue("count " + tc + " is not > " + Integer.MAX_VALUE, tc > Integer.MAX_VALUE); dir.close(); + System.out.println("TEST: done!"); + } + + private List findTerms(IndexReader r) throws IOException { + System.out.println("TEST: findTerms"); + final TermsEnum termsEnum = MultiFields.getTerms(r, "field").iterator(); + final List savedTerms = new ArrayList(); + int nextSave = _TestUtil.nextInt(random, 500000, 1000000); + BytesRef term; + while((term = termsEnum.next()) != null) { + if (--nextSave == 0) { + savedTerms.add(new BytesRef(term)); + System.out.println("TEST: add " + term); + nextSave = _TestUtil.nextInt(random, 500000, 1000000); + } + } + return savedTerms; + } + + private void testSavedTerms(IndexReader r, List terms) throws IOException { + System.out.println("TEST: run " + terms.size() + " terms on reader=" + r); + IndexSearcher s = new IndexSearcher(r); + Collections.shuffle(terms); + TermsEnum termsEnum = MultiFields.getTerms(r, "field").iterator(); + boolean failed = false; + for(int iter=0;iter<10*terms.size();iter++) { + final BytesRef term = terms.get(random.nextInt(terms.size())); + System.out.println("TEST: search " + term); + final long t0 = System.currentTimeMillis(); + final int count = s.search(new TermQuery(new Term("field", term)), 1).totalHits; + if (count <= 0) { + System.out.println(" FAILED: count=" + count); + failed = true; + } + final long t1 = System.currentTimeMillis(); + System.out.println(" took " + (t1-t0) + " millis"); + + TermsEnum.SeekStatus result = termsEnum.seek(term); + if (result != TermsEnum.SeekStatus.FOUND) { + if (result == TermsEnum.SeekStatus.END) { + System.out.println(" FAILED: got END"); + } else { + System.out.println(" FAILED: wrong term: got " + termsEnum.term()); + } + failed = true; + } + } + assertFalse(failed); } } diff --git a/lucene/src/test/org/apache/lucene/index/TestAddIndexes.java b/lucene/src/test/org/apache/lucene/index/TestAddIndexes.java index a0055751e43..097d9c9944d 100755 --- a/lucene/src/test/org/apache/lucene/index/TestAddIndexes.java +++ b/lucene/src/test/org/apache/lucene/index/TestAddIndexes.java @@ -53,7 +53,7 @@ public class TestAddIndexes extends LuceneTestCase { IndexWriter writer = null; writer = newWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, - new MockAnalyzer()) + new MockAnalyzer(random)) .setOpenMode(OpenMode.CREATE)); writer.setInfoStream(VERBOSE ? System.out : null); // add 100 documents @@ -64,7 +64,7 @@ public class TestAddIndexes extends LuceneTestCase { writer = newWriter( aux, - newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()). + newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)). setOpenMode(OpenMode.CREATE). setMergePolicy(newLogMergePolicy(false)) ); @@ -73,14 +73,14 @@ public class TestAddIndexes extends LuceneTestCase { assertEquals(40, writer.maxDoc()); writer.close(); - writer = newWriter(aux2, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).setOpenMode(OpenMode.CREATE)); + writer = newWriter(aux2, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setOpenMode(OpenMode.CREATE)); // add 40 documents in compound files addDocs2(writer, 50); assertEquals(50, writer.maxDoc()); writer.close(); // test doc count before segments are merged - writer = newWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).setOpenMode(OpenMode.APPEND)); + writer = newWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setOpenMode(OpenMode.APPEND)); assertEquals(100, writer.maxDoc()); writer.addIndexes(aux, aux2); assertEquals(190, writer.maxDoc()); @@ -95,14 +95,14 @@ public class TestAddIndexes extends LuceneTestCase { // now add another set in. Directory aux3 = newDirectory(); - writer = newWriter(aux3, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer())); + writer = newWriter(aux3, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random))); // add 40 documents addDocs(writer, 40); assertEquals(40, writer.maxDoc()); writer.close(); // test doc count before segments are merged/index is optimized - writer = newWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).setOpenMode(OpenMode.APPEND)); + writer = newWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setOpenMode(OpenMode.APPEND)); assertEquals(190, writer.maxDoc()); writer.addIndexes(aux3); assertEquals(230, writer.maxDoc()); @@ -116,7 +116,7 @@ public class TestAddIndexes extends LuceneTestCase { verifyTermDocs(dir, new Term("content", "bbb"), 50); // now optimize it. - writer = newWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).setOpenMode(OpenMode.APPEND)); + writer = newWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setOpenMode(OpenMode.APPEND)); writer.optimize(); writer.close(); @@ -129,11 +129,11 @@ public class TestAddIndexes extends LuceneTestCase { // now add a single document Directory aux4 = newDirectory(); - writer = newWriter(aux4, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer())); + writer = newWriter(aux4, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random))); addDocs2(writer, 1); writer.close(); - writer = newWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).setOpenMode(OpenMode.APPEND)); + writer = newWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setOpenMode(OpenMode.APPEND)); assertEquals(230, writer.maxDoc()); writer.addIndexes(aux4); assertEquals(231, writer.maxDoc()); @@ -156,7 +156,7 @@ public class TestAddIndexes extends LuceneTestCase { Directory aux = newDirectory(); setUpDirs(dir, aux); - IndexWriter writer = newWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).setOpenMode(OpenMode.APPEND)); + IndexWriter writer = newWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setOpenMode(OpenMode.APPEND)); writer.setInfoStream(VERBOSE ? System.out : null); writer.addIndexes(aux); @@ -194,7 +194,7 @@ public class TestAddIndexes extends LuceneTestCase { Directory aux = newDirectory(); setUpDirs(dir, aux); - IndexWriter writer = newWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).setOpenMode(OpenMode.APPEND)); + IndexWriter writer = newWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setOpenMode(OpenMode.APPEND)); // Adds 10 docs, then replaces them with another 10 // docs, so 10 pending deletes: @@ -232,7 +232,7 @@ public class TestAddIndexes extends LuceneTestCase { Directory aux = newDirectory(); setUpDirs(dir, aux); - IndexWriter writer = newWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).setOpenMode(OpenMode.APPEND)); + IndexWriter writer = newWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setOpenMode(OpenMode.APPEND)); // Adds 10 docs, then replaces them with another 10 // docs, so 10 pending deletes: @@ -273,7 +273,7 @@ public class TestAddIndexes extends LuceneTestCase { IndexWriter writer = null; - writer = newWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer())); + writer = newWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random))); // add 100 documents addDocs(writer, 100); assertEquals(100, writer.maxDoc()); @@ -281,7 +281,7 @@ public class TestAddIndexes extends LuceneTestCase { writer = newWriter( aux, - newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()). + newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)). setOpenMode(OpenMode.CREATE). setMaxBufferedDocs(1000). setMergePolicy(newLogMergePolicy(false)) @@ -291,7 +291,7 @@ public class TestAddIndexes extends LuceneTestCase { writer.close(); writer = newWriter( aux, - newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()). + newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)). setOpenMode(OpenMode.CREATE). setMaxBufferedDocs(1000). setMergePolicy(newLogMergePolicy(false)) @@ -299,7 +299,7 @@ public class TestAddIndexes extends LuceneTestCase { addDocs(writer, 100); writer.close(); - writer = newWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).setOpenMode(OpenMode.APPEND)); + writer = newWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setOpenMode(OpenMode.APPEND)); try { // cannot add self writer.addIndexes(aux, dir); @@ -329,7 +329,7 @@ public class TestAddIndexes extends LuceneTestCase { IndexWriter writer = newWriter( dir, - newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()). + newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)). setOpenMode(OpenMode.APPEND). setMaxBufferedDocs(10). setMergePolicy(newLogMergePolicy(4)) @@ -358,7 +358,7 @@ public class TestAddIndexes extends LuceneTestCase { IndexWriter writer = newWriter( dir, - newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()). + newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)). setOpenMode(OpenMode.APPEND). setMaxBufferedDocs(9). setMergePolicy(newLogMergePolicy(4)) @@ -387,7 +387,7 @@ public class TestAddIndexes extends LuceneTestCase { IndexWriter writer = newWriter( dir, - newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()). + newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)). setOpenMode(OpenMode.APPEND). setMaxBufferedDocs(10). setMergePolicy(newLogMergePolicy(4)) @@ -422,7 +422,7 @@ public class TestAddIndexes extends LuceneTestCase { IndexWriter writer = newWriter( dir, - newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()). + newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)). setOpenMode(OpenMode.APPEND). setMaxBufferedDocs(4). setMergePolicy(newLogMergePolicy(4)) @@ -448,7 +448,7 @@ public class TestAddIndexes extends LuceneTestCase { IndexWriter writer = newWriter( aux2, - newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()). + newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)). setOpenMode(OpenMode.CREATE). setMaxBufferedDocs(100). setMergePolicy(newLogMergePolicy(10)) @@ -475,7 +475,7 @@ public class TestAddIndexes extends LuceneTestCase { writer = newWriter( dir, - newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()). + newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)). setOpenMode(OpenMode.APPEND). setMaxBufferedDocs(6). setMergePolicy(newLogMergePolicy(4)) @@ -536,7 +536,7 @@ public class TestAddIndexes extends LuceneTestCase { private void setUpDirs(Directory dir, Directory aux) throws IOException { IndexWriter writer = null; - writer = newWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).setOpenMode(OpenMode.CREATE).setMaxBufferedDocs(1000)); + writer = newWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setOpenMode(OpenMode.CREATE).setMaxBufferedDocs(1000)); // add 1000 documents in 1 segment addDocs(writer, 1000); assertEquals(1000, writer.maxDoc()); @@ -545,7 +545,7 @@ public class TestAddIndexes extends LuceneTestCase { writer = newWriter( aux, - newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()). + newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)). setOpenMode(OpenMode.CREATE). setMaxBufferedDocs(1000). setMergePolicy(newLogMergePolicy(false, 10)) @@ -556,7 +556,7 @@ public class TestAddIndexes extends LuceneTestCase { writer.close(); writer = newWriter( aux, - newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()). + newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)). setOpenMode(OpenMode.APPEND). setMaxBufferedDocs(1000). setMergePolicy(newLogMergePolicy(false, 10)) @@ -575,7 +575,7 @@ public class TestAddIndexes extends LuceneTestCase { lmp.setUseCompoundFile(false); lmp.setMergeFactor(100); IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( - TEST_VERSION_CURRENT, new MockAnalyzer()) + TEST_VERSION_CURRENT, new MockAnalyzer(random)) .setMaxBufferedDocs(5).setMergePolicy(lmp)); Document doc = new Document(); @@ -603,7 +603,7 @@ public class TestAddIndexes extends LuceneTestCase { lmp.setUseCompoundFile(false); lmp.setMergeFactor(4); writer = new IndexWriter(dir2, newIndexWriterConfig(TEST_VERSION_CURRENT, - new MockAnalyzer()) + new MockAnalyzer(random)) .setMergeScheduler(new SerialMergeScheduler()).setMergePolicy(lmp)); writer.addIndexes(dir); writer.close(); @@ -636,14 +636,14 @@ public class TestAddIndexes extends LuceneTestCase { NUM_COPY = numCopy; dir = new MockDirectoryWrapper(random, new RAMDirectory()); IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig( - TEST_VERSION_CURRENT, new MockAnalyzer()) + TEST_VERSION_CURRENT, new MockAnalyzer(random)) .setMaxBufferedDocs(2)); for (int i = 0; i < NUM_INIT_DOCS; i++) addDoc(writer); writer.close(); dir2 = newDirectory(); - writer2 = new IndexWriter(dir2, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer())); + writer2 = new IndexWriter(dir2, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random))); writer2.setInfoStream(VERBOSE ? System.out : null); writer2.commit(); @@ -771,11 +771,11 @@ public class TestAddIndexes extends LuceneTestCase { c.joinThreads(); int expectedNumDocs = 100+NUM_COPY*(4*NUM_ITER/5)*RunAddIndexesThreads.NUM_THREADS*RunAddIndexesThreads.NUM_INIT_DOCS; - assertEquals(expectedNumDocs, c.writer2.numDocs()); + assertEquals("expected num docs don't match - failures: " + c.failures, expectedNumDocs, c.writer2.numDocs()); c.close(true); - assertTrue(c.failures.size() == 0); + assertTrue("found unexpected failures: " + c.failures, c.failures.isEmpty()); _TestUtil.checkIndex(c.dir2); @@ -938,6 +938,40 @@ public class TestAddIndexes extends LuceneTestCase { assertTrue(c.failures.size() == 0); } + + // LUCENE-2996: tests that addIndexes(IndexReader) applies existing deletes correctly. + public void testExistingDeletes() throws Exception { + Directory[] dirs = new Directory[2]; + for (int i = 0; i < dirs.length; i++) { + dirs[i] = newDirectory(); + IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)); + IndexWriter writer = new IndexWriter(dirs[i], conf); + Document doc = new Document(); + doc.add(new Field("id", "myid", Store.NO, Index.NOT_ANALYZED_NO_NORMS)); + writer.addDocument(doc); + writer.close(); + } + + IndexWriterConfig conf = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)); + IndexWriter writer = new IndexWriter(dirs[0], conf); + + // Now delete the document + writer.deleteDocuments(new Term("id", "myid")); + IndexReader r = IndexReader.open(dirs[1]); + try { + writer.addIndexes(r); + } finally { + r.close(); + } + writer.commit(); + assertEquals("Documents from the incoming index should not have been deleted", 1, writer.numDocs()); + writer.close(); + + for (Directory dir : dirs) { + dir.close(); + } + + } private void addDocs3(IndexWriter writer, int numDocs) throws IOException { for (int i = 0; i < numDocs; i++) { @@ -958,7 +992,7 @@ public class TestAddIndexes extends LuceneTestCase { IndexWriter writer = null; writer = newWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, - new MockAnalyzer()).setOpenMode(OpenMode.CREATE).setCodecProvider( + new MockAnalyzer(random)).setOpenMode(OpenMode.CREATE).setCodecProvider( provider)); // add 100 documents addDocs3(writer, 100); @@ -969,7 +1003,7 @@ public class TestAddIndexes extends LuceneTestCase { writer = newWriter( aux, - newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()). + newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)). setOpenMode(OpenMode.CREATE). setCodecProvider(provider). setMaxBufferedDocs(10). @@ -983,7 +1017,7 @@ public class TestAddIndexes extends LuceneTestCase { writer = newWriter( aux2, - newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()). + newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)). setOpenMode(OpenMode.CREATE). setCodecProvider(provider) ); @@ -996,7 +1030,7 @@ public class TestAddIndexes extends LuceneTestCase { // test doc count before segments are merged writer = newWriter( dir, - newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()). + newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)). setOpenMode(OpenMode.APPEND). setCodecProvider(provider) ); @@ -1029,7 +1063,7 @@ public class TestAddIndexes extends LuceneTestCase { Directory[] dirs = new Directory[2]; for (int i = 0; i < dirs.length; i++) { dirs[i] = new RAMDirectory(); - IndexWriter w = new IndexWriter(dirs[i], new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer())); + IndexWriter w = new IndexWriter(dirs[i], new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random))); Document d = new Document(); d.add(new Field("c", "v", Store.YES, Index.ANALYZED, TermVector.YES)); w.addDocument(d); @@ -1039,8 +1073,9 @@ public class TestAddIndexes extends LuceneTestCase { IndexReader[] readers = new IndexReader[] { IndexReader.open(dirs[0]), IndexReader.open(dirs[1]) }; Directory dir = new RAMDirectory(); - IndexWriterConfig conf = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()); + IndexWriterConfig conf = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMergePolicy(newLogMergePolicy()); LogMergePolicy lmp = (LogMergePolicy) conf.getMergePolicy(); + lmp.setUseCompoundFile(true); lmp.setNoCFSRatio(1.0); // Force creation of CFS IndexWriter w3 = new IndexWriter(dir, conf); w3.addIndexes(readers); diff --git a/lucene/src/test/org/apache/lucene/index/TestAtomicUpdate.java b/lucene/src/test/org/apache/lucene/index/TestAtomicUpdate.java index 95da21de23e..378aeb9f5a2 100644 --- a/lucene/src/test/org/apache/lucene/index/TestAtomicUpdate.java +++ b/lucene/src/test/org/apache/lucene/index/TestAtomicUpdate.java @@ -127,9 +127,9 @@ public class TestAtomicUpdate extends LuceneTestCase { TimedThread[] threads = new TimedThread[4]; IndexWriterConfig conf = new IndexWriterConfig( - TEST_VERSION_CURRENT, new MockAnalyzer()) + TEST_VERSION_CURRENT, new MockAnalyzer(random)) .setMaxBufferedDocs(7); - ((LogMergePolicy) conf.getMergePolicy()).setMergeFactor(3); + ((TieredMergePolicy) conf.getMergePolicy()).setMaxMergeAtOnce(3); IndexWriter writer = new MockIndexWriter(directory, conf); writer.setInfoStream(VERBOSE ? System.out : null); diff --git a/lucene/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java b/lucene/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java index f92da6571a6..efee37fce32 100644 --- a/lucene/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java +++ b/lucene/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java @@ -132,7 +132,7 @@ public class TestBackwardsCompatibility extends LuceneTestCase { try { writer = new IndexWriter(dir, newIndexWriterConfig( - TEST_VERSION_CURRENT, new MockAnalyzer())); + TEST_VERSION_CURRENT, new MockAnalyzer(random))); fail("IndexWriter creation should not pass for "+unsupportedNames[i]); } catch (IndexFormatTooOldException e) { // pass @@ -174,7 +174,7 @@ public class TestBackwardsCompatibility extends LuceneTestCase { Directory dir = newFSDirectory(oldIndxeDir); IndexWriter w = new IndexWriter(dir, new IndexWriterConfig( - TEST_VERSION_CURRENT, new MockAnalyzer())); + TEST_VERSION_CURRENT, new MockAnalyzer(random))); w.setInfoStream(VERBOSE ? System.out : null); w.optimize(); w.close(); @@ -194,7 +194,7 @@ public class TestBackwardsCompatibility extends LuceneTestCase { Directory targetDir = newDirectory(); IndexWriter w = new IndexWriter(targetDir, newIndexWriterConfig( - TEST_VERSION_CURRENT, new MockAnalyzer())); + TEST_VERSION_CURRENT, new MockAnalyzer(random))); w.addIndexes(dir); w.close(); @@ -215,7 +215,7 @@ public class TestBackwardsCompatibility extends LuceneTestCase { Directory targetDir = newDirectory(); IndexWriter w = new IndexWriter(targetDir, newIndexWriterConfig( - TEST_VERSION_CURRENT, new MockAnalyzer())); + TEST_VERSION_CURRENT, new MockAnalyzer(random))); w.addIndexes(reader); w.close(); reader.close(); @@ -268,7 +268,7 @@ public class TestBackwardsCompatibility extends LuceneTestCase { } public void searchIndex(File indexDir, String oldName) throws IOException { - //QueryParser parser = new QueryParser("contents", new MockAnalyzer()); + //QueryParser parser = new QueryParser("contents", new MockAnalyzer(random)); //Query query = parser.parse("handle:1"); Directory dir = newFSDirectory(indexDir); @@ -340,7 +340,7 @@ public class TestBackwardsCompatibility extends LuceneTestCase { Directory dir = newFSDirectory(oldIndexDir); // open writer - IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).setOpenMode(OpenMode.APPEND)); + IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setOpenMode(OpenMode.APPEND)); writer.setInfoStream(VERBOSE ? System.out : null); // add 10 docs for(int i=0;i<10;i++) { @@ -385,7 +385,7 @@ public class TestBackwardsCompatibility extends LuceneTestCase { searcher.close(); // optimize - writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).setOpenMode(OpenMode.APPEND)); + writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setOpenMode(OpenMode.APPEND)); writer.optimize(); writer.close(); @@ -430,7 +430,7 @@ public class TestBackwardsCompatibility extends LuceneTestCase { searcher.close(); // optimize - IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).setOpenMode(OpenMode.APPEND)); + IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setOpenMode(OpenMode.APPEND)); writer.optimize(); writer.close(); @@ -451,7 +451,7 @@ public class TestBackwardsCompatibility extends LuceneTestCase { _TestUtil.rmDir(indexDir); Directory dir = newFSDirectory(indexDir); - IndexWriterConfig conf = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).setMaxBufferedDocs(10); + IndexWriterConfig conf = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMaxBufferedDocs(10); ((LogMergePolicy) conf.getMergePolicy()).setUseCompoundFile(doCFS); IndexWriter writer = new IndexWriter(dir, conf); @@ -462,7 +462,7 @@ public class TestBackwardsCompatibility extends LuceneTestCase { writer.close(); // open fresh writer so we get no prx file in the added segment - conf = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).setMaxBufferedDocs(10); + conf = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMaxBufferedDocs(10); ((LogMergePolicy) conf.getMergePolicy()).setUseCompoundFile(doCFS); writer = new IndexWriter(dir, conf); addNoProxDoc(writer); @@ -498,7 +498,7 @@ public class TestBackwardsCompatibility extends LuceneTestCase { IndexWriter writer = new IndexWriter( dir, - newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()). + newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)). setMaxBufferedDocs(-1). setRAMBufferSizeMB(16.0). setMergePolicy(mergePolicy) diff --git a/lucene/src/test/org/apache/lucene/index/TestCheckIndex.java b/lucene/src/test/org/apache/lucene/index/TestCheckIndex.java index b8b195f2ea3..04810a3a7dc 100644 --- a/lucene/src/test/org/apache/lucene/index/TestCheckIndex.java +++ b/lucene/src/test/org/apache/lucene/index/TestCheckIndex.java @@ -34,7 +34,7 @@ public class TestCheckIndex extends LuceneTestCase { public void testDeletedDocs() throws IOException { Directory dir = newDirectory(); - IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).setMaxBufferedDocs(2)); + IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMaxBufferedDocs(2)); Document doc = new Document(); doc.add(newField("field", "aaa", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); for(int i=0;i<19;i++) { diff --git a/lucene/src/test/org/apache/lucene/index/TestCodecs.java b/lucene/src/test/org/apache/lucene/index/TestCodecs.java index 13156a94587..44b84b504ad 100644 --- a/lucene/src/test/org/apache/lucene/index/TestCodecs.java +++ b/lucene/src/test/org/apache/lucene/index/TestCodecs.java @@ -322,7 +322,7 @@ public class TestCodecs extends LuceneTestCase { public void testSepPositionAfterMerge() throws IOException { final Directory dir = newDirectory(); final IndexWriterConfig config = newIndexWriterConfig(Version.LUCENE_31, - new MockAnalyzer()); + new MockAnalyzer(random)); config.setCodecProvider(new MockSepCodecs()); final IndexWriter writer = new IndexWriter(dir, config); @@ -593,7 +593,7 @@ public class TestCodecs extends LuceneTestCase { final int termIndexInterval = _TestUtil.nextInt(random, 13, 27); final SegmentCodecs codecInfo = fieldInfos.buildSegmentCodecs(false); - final SegmentWriteState state = new SegmentWriteState(null, dir, SEGMENT, fieldInfos, 10000, termIndexInterval, codecInfo, null, new AtomicLong(0)); + final SegmentWriteState state = new SegmentWriteState(null, dir, SEGMENT, fieldInfos, 10000, termIndexInterval, codecInfo, null); final FieldsConsumer consumer = state.segmentCodecs.codec().fieldsConsumer(state); Arrays.sort(fields); diff --git a/lucene/src/test/org/apache/lucene/index/TestCompoundFile.java b/lucene/src/test/org/apache/lucene/index/TestCompoundFile.java index 9e4eba610e2..88d499e1359 100644 --- a/lucene/src/test/org/apache/lucene/index/TestCompoundFile.java +++ b/lucene/src/test/org/apache/lucene/index/TestCompoundFile.java @@ -57,8 +57,7 @@ public class TestCompoundFile extends LuceneTestCase @Override public void setUp() throws Exception { super.setUp(); - File file = new File(TEMP_DIR, "testIndex"); - _TestUtil.rmDir(file); + File file = _TestUtil.getTempDir("testIndex"); // use a simple FSDir here, to be sure to have SimpleFSInputs dir = new SimpleFSDirectory(file,null); } @@ -66,7 +65,6 @@ public class TestCompoundFile extends LuceneTestCase @Override public void tearDown() throws Exception { dir.close(); - _TestUtil.rmDir(new File(TEMP_DIR, "testIndex")); super.tearDown(); } diff --git a/lucene/src/test/org/apache/lucene/index/TestConcurrentMergeScheduler.java b/lucene/src/test/org/apache/lucene/index/TestConcurrentMergeScheduler.java index 5e50c968b51..c7e5927ecd9 100644 --- a/lucene/src/test/org/apache/lucene/index/TestConcurrentMergeScheduler.java +++ b/lucene/src/test/org/apache/lucene/index/TestConcurrentMergeScheduler.java @@ -50,7 +50,7 @@ public class TestConcurrentMergeScheduler extends LuceneTestCase { boolean isClose = false; StackTraceElement[] trace = new Exception().getStackTrace(); for (int i = 0; i < trace.length; i++) { - if ("doFlush".equals(trace[i].getMethodName())) { + if ("flush".equals(trace[i].getMethodName())) { isDoFlush = true; } if ("close".equals(trace[i].getMethodName())) { @@ -72,7 +72,7 @@ public class TestConcurrentMergeScheduler extends LuceneTestCase { FailOnlyOnFlush failure = new FailOnlyOnFlush(); directory.failOn(failure); - IndexWriter writer = new IndexWriter(directory, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).setMaxBufferedDocs(2)); + IndexWriter writer = new IndexWriter(directory, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMaxBufferedDocs(2)); writer.setInfoStream(VERBOSE ? System.out : null); Document doc = new Document(); Field idField = newField("id", "", Field.Store.YES, Field.Index.NOT_ANALYZED); @@ -130,7 +130,7 @@ public class TestConcurrentMergeScheduler extends LuceneTestCase { // start: mp.setMinMergeDocs(1000); IndexWriter writer = new IndexWriter(directory, newIndexWriterConfig( - TEST_VERSION_CURRENT, new MockAnalyzer()) + TEST_VERSION_CURRENT, new MockAnalyzer(random)) .setMergePolicy(mp)); writer.setInfoStream(VERBOSE ? System.out : null); @@ -169,7 +169,7 @@ public class TestConcurrentMergeScheduler extends LuceneTestCase { public void testNoExtraFiles() throws IOException { MockDirectoryWrapper directory = newDirectory(); IndexWriter writer = new IndexWriter(directory, newIndexWriterConfig( - TEST_VERSION_CURRENT, new MockAnalyzer()) + TEST_VERSION_CURRENT, new MockAnalyzer(random)) .setMaxBufferedDocs(2)); writer.setInfoStream(VERBOSE ? System.out : null); @@ -189,7 +189,7 @@ public class TestConcurrentMergeScheduler extends LuceneTestCase { // Reopen writer = new IndexWriter(directory, newIndexWriterConfig( - TEST_VERSION_CURRENT, new MockAnalyzer()) + TEST_VERSION_CURRENT, new MockAnalyzer(random)) .setOpenMode(OpenMode.APPEND).setMaxBufferedDocs(2)); writer.setInfoStream(VERBOSE ? System.out : null); } @@ -207,7 +207,7 @@ public class TestConcurrentMergeScheduler extends LuceneTestCase { IndexWriter writer = new IndexWriter( directory, - newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()). + newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)). setMaxBufferedDocs(2). setMergePolicy(newLogMergePolicy(100)) ); @@ -240,7 +240,7 @@ public class TestConcurrentMergeScheduler extends LuceneTestCase { // Reopen writer = new IndexWriter( directory, - newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()). + newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)). setOpenMode(OpenMode.APPEND). setMergePolicy(newLogMergePolicy(100)) ); diff --git a/lucene/src/test/org/apache/lucene/index/TestConsistentFieldNumbers.java b/lucene/src/test/org/apache/lucene/index/TestConsistentFieldNumbers.java index de93f378737..04dcd12a648 100644 --- a/lucene/src/test/org/apache/lucene/index/TestConsistentFieldNumbers.java +++ b/lucene/src/test/org/apache/lucene/index/TestConsistentFieldNumbers.java @@ -35,7 +35,7 @@ public class TestConsistentFieldNumbers extends LuceneTestCase { public void testSameFieldNumbersAcrossSegments() throws Exception { for (int i = 0; i < 2; i++) { Directory dir = newDirectory(); - IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer()).setMergePolicy(NoMergePolicy.COMPOUND_FILES)); + IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMergePolicy(NoMergePolicy.COMPOUND_FILES)); Document d1 = new Document(); d1.add(new Field("f1", "first field", Store.YES, Index.ANALYZED, TermVector.NO)); @@ -44,7 +44,7 @@ public class TestConsistentFieldNumbers extends LuceneTestCase { if (i == 1) { writer.close(); - writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer()).setMergePolicy(NoMergePolicy.COMPOUND_FILES)); + writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMergePolicy(NoMergePolicy.COMPOUND_FILES)); } else { writer.commit(); } @@ -72,7 +72,7 @@ public class TestConsistentFieldNumbers extends LuceneTestCase { assertEquals("f3", fis2.fieldInfo(2).name); assertEquals("f4", fis2.fieldInfo(3).name); - writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer())); + writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random))); writer.optimize(); writer.close(); @@ -96,7 +96,7 @@ public class TestConsistentFieldNumbers extends LuceneTestCase { public void testAddIndexes() throws Exception { Directory dir1 = newDirectory(); Directory dir2 = newDirectory(); - IndexWriter writer = new IndexWriter(dir1, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer()).setMergePolicy(NoMergePolicy.COMPOUND_FILES)); + IndexWriter writer = new IndexWriter(dir1, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMergePolicy(NoMergePolicy.COMPOUND_FILES)); Document d1 = new Document(); d1.add(new Field("f1", "first field", Store.YES, Index.ANALYZED, TermVector.NO)); @@ -104,7 +104,7 @@ public class TestConsistentFieldNumbers extends LuceneTestCase { writer.addDocument(d1); writer.close(); - writer = new IndexWriter(dir2, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer()).setMergePolicy(NoMergePolicy.COMPOUND_FILES)); + writer = new IndexWriter(dir2, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMergePolicy(NoMergePolicy.COMPOUND_FILES)); Document d2 = new Document(); d2.add(new Field("f2", "second field", Store.YES, Index.ANALYZED, TermVector.NO)); @@ -115,7 +115,7 @@ public class TestConsistentFieldNumbers extends LuceneTestCase { writer.close(); - writer = new IndexWriter(dir1, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer()).setMergePolicy(NoMergePolicy.COMPOUND_FILES)); + writer = new IndexWriter(dir1, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMergePolicy(NoMergePolicy.COMPOUND_FILES)); writer.addIndexes(dir2); writer.close(); @@ -134,7 +134,7 @@ public class TestConsistentFieldNumbers extends LuceneTestCase { assertEquals("f3", fis2.fieldInfo(2).name); assertEquals("f4", fis2.fieldInfo(3).name); - writer = new IndexWriter(dir1, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer())); + writer = new IndexWriter(dir1, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random))); writer.optimize(); writer.close(); @@ -159,7 +159,7 @@ public class TestConsistentFieldNumbers extends LuceneTestCase { Directory dir = newDirectory(); { IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( - TEST_VERSION_CURRENT, new MockAnalyzer()).setMergePolicy( + TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMergePolicy( NoMergePolicy.NO_COMPOUND_FILES)); Document d = new Document(); d.add(new Field("f1", "d1 first field", Store.YES, Index.ANALYZED, @@ -180,7 +180,7 @@ public class TestConsistentFieldNumbers extends LuceneTestCase { { IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( - TEST_VERSION_CURRENT, new MockAnalyzer()).setMergePolicy( + TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMergePolicy( random.nextBoolean() ? NoMergePolicy.NO_COMPOUND_FILES : NoMergePolicy.COMPOUND_FILES)); Document d = new Document(); @@ -205,7 +205,7 @@ public class TestConsistentFieldNumbers extends LuceneTestCase { { IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( - TEST_VERSION_CURRENT, new MockAnalyzer()).setMergePolicy( + TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMergePolicy( random.nextBoolean() ? NoMergePolicy.NO_COMPOUND_FILES : NoMergePolicy.COMPOUND_FILES)); Document d = new Document(); @@ -237,7 +237,7 @@ public class TestConsistentFieldNumbers extends LuceneTestCase { { IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( - TEST_VERSION_CURRENT, new MockAnalyzer()).setMergePolicy( + TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMergePolicy( random.nextBoolean() ? NoMergePolicy.NO_COMPOUND_FILES : NoMergePolicy.COMPOUND_FILES)); writer.deleteDocuments(new Term("f1", "d1")); @@ -248,7 +248,7 @@ public class TestConsistentFieldNumbers extends LuceneTestCase { } IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( - TEST_VERSION_CURRENT, new MockAnalyzer()).setMergePolicy( + TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMergePolicy( new LogByteSizeMergePolicy())); writer.optimize(); assertFalse(" field numbers got mixed up", writer.anyNonBulkMerges); @@ -281,7 +281,7 @@ public class TestConsistentFieldNumbers extends LuceneTestCase { } Directory dir = newDirectory(); - IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer())); + IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random))); for (int i = 0; i < NUM_DOCS; i++) { Document d = new Document(); diff --git a/lucene/src/test/org/apache/lucene/index/TestCrash.java b/lucene/src/test/org/apache/lucene/index/TestCrash.java index 521632e4a7a..34fd0b80890 100644 --- a/lucene/src/test/org/apache/lucene/index/TestCrash.java +++ b/lucene/src/test/org/apache/lucene/index/TestCrash.java @@ -36,7 +36,7 @@ public class TestCrash extends LuceneTestCase { private IndexWriter initIndex(Random random, MockDirectoryWrapper dir, boolean initialCommit) throws IOException { dir.setLockFactory(NoLockFactory.getNoLockFactory()); - IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()) + IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)) .setMaxBufferedDocs(10).setMergeScheduler(new ConcurrentMergeScheduler())); ((ConcurrentMergeScheduler) writer.getConfig().getMergeScheduler()).setSuppressExceptions(); if (initialCommit) { diff --git a/lucene/src/test/org/apache/lucene/index/TestDeletionPolicy.java b/lucene/src/test/org/apache/lucene/index/TestDeletionPolicy.java index 98cb5b50e96..8e71ca8d19e 100644 --- a/lucene/src/test/org/apache/lucene/index/TestDeletionPolicy.java +++ b/lucene/src/test/org/apache/lucene/index/TestDeletionPolicy.java @@ -204,7 +204,7 @@ public class TestDeletionPolicy extends LuceneTestCase { Directory dir = newDirectory(); ExpirationTimeDeletionPolicy policy = new ExpirationTimeDeletionPolicy(dir, SECONDS); IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, - new MockAnalyzer()) + new MockAnalyzer(random)) .setIndexDeletionPolicy(policy); MergePolicy mp = conf.getMergePolicy(); if (mp instanceof LogMergePolicy) { @@ -221,7 +221,7 @@ public class TestDeletionPolicy extends LuceneTestCase { // past commits lastDeleteTime = System.currentTimeMillis(); conf = newIndexWriterConfig(TEST_VERSION_CURRENT, - new MockAnalyzer()).setOpenMode( + new MockAnalyzer(random)).setOpenMode( OpenMode.APPEND).setIndexDeletionPolicy(policy); mp = conf.getMergePolicy(); if (mp instanceof LogMergePolicy) { @@ -303,7 +303,7 @@ public class TestDeletionPolicy extends LuceneTestCase { policy.dir = dir; IndexWriterConfig conf = newIndexWriterConfig( - TEST_VERSION_CURRENT, new MockAnalyzer()) + TEST_VERSION_CURRENT, new MockAnalyzer(random)) .setIndexDeletionPolicy(policy).setMaxBufferedDocs(10) .setMergeScheduler(new SerialMergeScheduler()); MergePolicy mp = conf.getMergePolicy(); @@ -324,7 +324,7 @@ public class TestDeletionPolicy extends LuceneTestCase { } if (!isOptimized) { conf = newIndexWriterConfig(TEST_VERSION_CURRENT, - new MockAnalyzer()).setOpenMode( + new MockAnalyzer(random)).setOpenMode( OpenMode.APPEND).setIndexDeletionPolicy(policy); mp = conf.getMergePolicy(); if (mp instanceof LogMergePolicy) { @@ -373,7 +373,7 @@ public class TestDeletionPolicy extends LuceneTestCase { int preCount = dir.listAll().length; writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, - new MockAnalyzer()).setOpenMode( + new MockAnalyzer(random)).setOpenMode( OpenMode.APPEND).setIndexDeletionPolicy(policy)); writer.close(); int postCount = dir.listAll().length; @@ -397,7 +397,7 @@ public class TestDeletionPolicy extends LuceneTestCase { IndexWriter writer = new IndexWriter( dir, - newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()). + newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)). setIndexDeletionPolicy(policy). setMaxBufferedDocs(2). setMergePolicy(newLogMergePolicy(10)) @@ -419,7 +419,7 @@ public class TestDeletionPolicy extends LuceneTestCase { assertTrue(lastCommit != null); // Now add 1 doc and optimize - writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).setIndexDeletionPolicy(policy)); + writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setIndexDeletionPolicy(policy)); addDoc(writer); assertEquals(11, writer.numDocs()); writer.optimize(); @@ -428,7 +428,7 @@ public class TestDeletionPolicy extends LuceneTestCase { assertEquals(6, IndexReader.listCommits(dir).size()); // Now open writer on the commit just before optimize: - writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()) + writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)) .setIndexDeletionPolicy(policy).setIndexCommit(lastCommit)); assertEquals(10, writer.numDocs()); @@ -441,7 +441,7 @@ public class TestDeletionPolicy extends LuceneTestCase { assertEquals(11, r.numDocs()); r.close(); - writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()) + writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)) .setIndexDeletionPolicy(policy).setIndexCommit(lastCommit)); assertEquals(10, writer.numDocs()); // Commits the rollback: @@ -458,7 +458,7 @@ public class TestDeletionPolicy extends LuceneTestCase { r.close(); // Reoptimize - writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).setIndexDeletionPolicy(policy)); + writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setIndexDeletionPolicy(policy)); writer.optimize(); writer.close(); @@ -469,7 +469,7 @@ public class TestDeletionPolicy extends LuceneTestCase { // Now open writer on the commit just before optimize, // but this time keeping only the last commit: - writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).setIndexCommit(lastCommit)); + writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setIndexCommit(lastCommit)); assertEquals(10, writer.numDocs()); // Reader still sees optimized index, because writer @@ -505,7 +505,7 @@ public class TestDeletionPolicy extends LuceneTestCase { Directory dir = newDirectory(); IndexWriterConfig conf = newIndexWriterConfig( - TEST_VERSION_CURRENT, new MockAnalyzer()) + TEST_VERSION_CURRENT, new MockAnalyzer(random)) .setOpenMode(OpenMode.CREATE).setIndexDeletionPolicy(policy) .setMaxBufferedDocs(10); MergePolicy mp = conf.getMergePolicy(); @@ -518,7 +518,7 @@ public class TestDeletionPolicy extends LuceneTestCase { } writer.close(); - conf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()) + conf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)) .setOpenMode(OpenMode.APPEND).setIndexDeletionPolicy(policy); mp = conf.getMergePolicy(); if (mp instanceof LogMergePolicy) { @@ -558,7 +558,7 @@ public class TestDeletionPolicy extends LuceneTestCase { for(int j=0;j files) throws IOException { + StandardPostingsReader.files(dir, segmentInfo, ""+id, files); + BlockTermsReader.files(dir, segmentInfo, ""+id, files); + FixedGapTermsIndexReader.files(dir, segmentInfo, ""+id, files); + } + + @Override + public void getExtensions(Set extensions) { + getStandardExtensions(extensions); + } + + public static void getStandardExtensions(Set extensions) { + extensions.add(FREQ_EXTENSION); + extensions.add(PROX_EXTENSION); + BlockTermsReader.getExtensions(extensions); + FixedGapTermsIndexReader.getIndexExtensions(extensions); + } + } + + public void testRandom() throws Exception { + MockDirectoryWrapper dir = newDirectory(); + + final int NUM_TERMS = 100 * RANDOM_MULTIPLIER; + final Set terms = new HashSet(); + while(terms.size() < NUM_TERMS) { + final String s = _TestUtil.randomRealisticUnicodeString(random); + //final String s = _TestUtil.randomSimpleString(random); + if (s.length() > 0) { + terms.add(new BytesRef(s)); + } + } + final BytesRef[] termsArray = terms.toArray(new BytesRef[terms.size()]); + Arrays.sort(termsArray); + + final int NUM_DOCS = 1000 * RANDOM_MULTIPLIER; + + IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)); + + // Sometimes swap in codec that impls ord(): + if (random.nextInt(10) == 7) { + // Make sure terms index has ords: + CoreCodecProvider cp = new CoreCodecProvider(); + cp.register(new StandardCodecWithOrds()); + cp.setDefaultFieldCodec("StandardOrds"); + + // So checkIndex on close works + dir.setCodecProvider(cp); + conf.setCodecProvider(cp); + } + + final RandomIndexWriter w = new RandomIndexWriter(random, dir, conf); + + final int[][] idToOrds = new int[NUM_DOCS][]; + final Set ordsForDocSet = new HashSet(); + + for(int id=0;id prefixes = new HashSet(); + final int numPrefix = _TestUtil.nextInt(random, 2, 7); + if (VERBOSE) { + System.out.println("TEST: use " + numPrefix + " prefixes"); + } + while(prefixes.size() < numPrefix) { + prefixes.add(_TestUtil.randomRealisticUnicodeString(random)); + //prefixes.add(_TestUtil.randomSimpleString(random)); + } + final String[] prefixesArray = prefixes.toArray(new String[prefixes.size()]); + + final int NUM_TERMS = 100 * RANDOM_MULTIPLIER; + final Set terms = new HashSet(); + while(terms.size() < NUM_TERMS) { + final String s = prefixesArray[random.nextInt(prefixesArray.length)] + _TestUtil.randomRealisticUnicodeString(random); + //final String s = prefixesArray[random.nextInt(prefixesArray.length)] + _TestUtil.randomSimpleString(random); + if (s.length() > 0) { + terms.add(new BytesRef(s)); + } + } + final BytesRef[] termsArray = terms.toArray(new BytesRef[terms.size()]); + Arrays.sort(termsArray); + + final int NUM_DOCS = 1000 * RANDOM_MULTIPLIER; + + IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)); + + // Sometimes swap in codec that impls ord(): + if (random.nextInt(10) == 7) { + // Make sure terms index has ords: + CoreCodecProvider cp = new CoreCodecProvider(); + cp.register(new StandardCodecWithOrds()); + cp.setDefaultFieldCodec("StandardOrds"); + + // So checkIndex on close works + dir.setCodecProvider(cp); + conf.setCodecProvider(cp); + } + + final RandomIndexWriter w = new RandomIndexWriter(random, dir, conf); + + final int[][] idToOrds = new int[NUM_DOCS][]; + final Set ordsForDocSet = new HashSet(); + + for(int id=0;id uniqueValues = new HashSet(); + for (int j = 0; j < ids.length; j++) { + Integer i = ids[j]; + // create an array here since we compare identity below against tailItem + Term[] term = new Term[] {template.createTerm(i.toString())}; + uniqueValues.add(term[0]); + queue.addDelete(term); + if (random.nextInt(20) == 0 || j == ids.length - 1) { + queue.updateSlice(slice1); + assertTrue(slice1.isTailItem(term)); + slice1.apply(bd1, j); + assertAllBetween(last1, j, bd1, ids); + last1 = j + 1; + } + if (random.nextInt(10) == 5 || j == ids.length - 1) { + queue.updateSlice(slice2); + assertTrue(slice2.isTailItem(term)); + slice2.apply(bd2, j); + assertAllBetween(last2, j, bd2, ids); + last2 = j + 1; + } + assertEquals(uniqueValues.size(), queue.numGlobalTermDeletes()); + } + assertEquals(uniqueValues, bd1.terms.keySet()); + assertEquals(uniqueValues, bd2.terms.keySet()); + assertEquals(uniqueValues, new HashSet(Arrays.asList(queue + .freezeGlobalBuffer(null).terms))); + assertEquals("num deletes must be 0 after freeze", 0, queue + .numGlobalTermDeletes()); + } + + private void assertAllBetween(int start, int end, BufferedDeletes deletes, + Integer[] ids) { + Term template = new Term("id"); + for (int i = start; i <= end; i++) { + assertEquals(Integer.valueOf(end), deletes.terms.get(template + .createTerm(ids[i].toString()))); + } + } + + public void testClear() { + DocumentsWriterDeleteQueue queue = new DocumentsWriterDeleteQueue(); + Term template = new Term("id"); + assertFalse(queue.anyChanges()); + queue.clear(); + assertFalse(queue.anyChanges()); + final int size = 200 + random.nextInt(500) * RANDOM_MULTIPLIER; + int termsSinceFreeze = 0; + int queriesSinceFreeze = 0; + for (int i = 0; i < size; i++) { + Term term = template.createTerm("" + i); + if (random.nextInt(10) == 0) { + queue.addDelete(new TermQuery(term)); + queriesSinceFreeze++; + } else { + queue.addDelete(term); + termsSinceFreeze++; + } + assertTrue(queue.anyChanges()); + if (random.nextInt(10) == 0) { + queue.clear(); + queue.tryApplyGlobalSlice(); + assertFalse(queue.anyChanges()); + } + } + + } + + public void testAnyChanges() { + DocumentsWriterDeleteQueue queue = new DocumentsWriterDeleteQueue(); + Term template = new Term("id"); + final int size = 200 + random.nextInt(500) * RANDOM_MULTIPLIER; + int termsSinceFreeze = 0; + int queriesSinceFreeze = 0; + for (int i = 0; i < size; i++) { + Term term = template.createTerm("" + i); + if (random.nextInt(10) == 0) { + queue.addDelete(new TermQuery(term)); + queriesSinceFreeze++; + } else { + queue.addDelete(term); + termsSinceFreeze++; + } + assertTrue(queue.anyChanges()); + if (random.nextInt(5) == 0) { + FrozenBufferedDeletes freezeGlobalBuffer = queue + .freezeGlobalBuffer(null); + assertEquals(termsSinceFreeze, freezeGlobalBuffer.terms.length); + assertEquals(queriesSinceFreeze, freezeGlobalBuffer.queries.length); + queriesSinceFreeze = 0; + termsSinceFreeze = 0; + assertFalse(queue.anyChanges()); + } + } + } + + public void testStressDeleteQueue() throws InterruptedException { + DocumentsWriterDeleteQueue queue = new DocumentsWriterDeleteQueue(); + Set uniqueValues = new HashSet(); + final int size = 10000 + random.nextInt(500) * RANDOM_MULTIPLIER; + Integer[] ids = new Integer[size]; + Term template = new Term("id"); + for (int i = 0; i < ids.length; i++) { + ids[i] = random.nextInt(); + uniqueValues.add(template.createTerm(ids[i].toString())); + } + CountDownLatch latch = new CountDownLatch(1); + AtomicInteger index = new AtomicInteger(0); + final int numThreads = 2 + random.nextInt(5); + UpdateThread[] threads = new UpdateThread[numThreads]; + for (int i = 0; i < threads.length; i++) { + threads[i] = new UpdateThread(queue, index, ids, latch); + threads[i].start(); + } + latch.countDown(); + for (int i = 0; i < threads.length; i++) { + threads[i].join(); + } + + for (UpdateThread updateThread : threads) { + DeleteSlice slice = updateThread.slice; + queue.updateSlice(slice); + BufferedDeletes deletes = updateThread.deletes; + slice.apply(deletes, BufferedDeletes.MAX_INT); + assertEquals(uniqueValues, deletes.terms.keySet()); + } + queue.tryApplyGlobalSlice(); + assertEquals(uniqueValues, new HashSet(Arrays.asList(queue + .freezeGlobalBuffer(null).terms))); + assertEquals("num deletes must be 0 after freeze", 0, queue + .numGlobalTermDeletes()); + } + + private static class UpdateThread extends Thread { + final DocumentsWriterDeleteQueue queue; + final AtomicInteger index; + final Integer[] ids; + final DeleteSlice slice; + final BufferedDeletes deletes; + final CountDownLatch latch; + + protected UpdateThread(DocumentsWriterDeleteQueue queue, + AtomicInteger index, Integer[] ids, CountDownLatch latch) { + this.queue = queue; + this.index = index; + this.ids = ids; + this.slice = queue.newSlice(); + deletes = new BufferedDeletes(false); + this.latch = latch; + } + + @Override + public void run() { + try { + latch.await(); + } catch (InterruptedException e) { + throw new ThreadInterruptedException(e); + } + Term template = new Term("id"); + int i = 0; + while ((i = index.getAndIncrement()) < ids.length) { + Term term = template.createTerm(ids[i].toString()); + queue.add(term, slice); + assertTrue(slice.isTailItem(term)); + slice.apply(deletes, BufferedDeletes.MAX_INT); + } + } + } + +} diff --git a/lucene/src/test/org/apache/lucene/index/TestFieldsReader.java b/lucene/src/test/org/apache/lucene/index/TestFieldsReader.java index e063de266dc..26b1717072f 100644 --- a/lucene/src/test/org/apache/lucene/index/TestFieldsReader.java +++ b/lucene/src/test/org/apache/lucene/index/TestFieldsReader.java @@ -51,7 +51,7 @@ public class TestFieldsReader extends LuceneTestCase { DocHelper.setupDoc(testDoc); _TestUtil.add(testDoc, fieldInfos); dir = newDirectory(); - IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).setMergePolicy(newLogMergePolicy()); + IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMergePolicy(newLogMergePolicy()); ((LogMergePolicy) conf.getMergePolicy()).setUseCompoundFile(false); IndexWriter writer = new IndexWriter(dir, conf); writer.addDocument(testDoc); @@ -286,12 +286,11 @@ public class TestFieldsReader extends LuceneTestCase { */ public void testLazyPerformance() throws Exception { String userName = System.getProperty("user.name"); - File file = new File(TEMP_DIR, "lazyDir" + userName); - _TestUtil.rmDir(file); + File file = _TestUtil.getTempDir("lazyDir" + userName); Directory tmpDir = newFSDirectory(file); assertTrue(tmpDir != null); - IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).setOpenMode(OpenMode.CREATE).setMergePolicy(newLogMergePolicy()); + IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setOpenMode(OpenMode.CREATE).setMergePolicy(newLogMergePolicy()); ((LogMergePolicy) conf.getMergePolicy()).setUseCompoundFile(false); IndexWriter writer = new IndexWriter(tmpDir, conf); writer.addDocument(testDoc); @@ -473,12 +472,12 @@ public class TestFieldsReader extends LuceneTestCase { // LUCENE-1262 public void testExceptions() throws Throwable { - File indexDir = new File(TEMP_DIR, "testfieldswriterexceptions"); + File indexDir = _TestUtil.getTempDir("testfieldswriterexceptions"); try { Directory dir = new FaultyFSDirectory(indexDir); IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( - TEST_VERSION_CURRENT, new MockAnalyzer()).setOpenMode(OpenMode.CREATE)); + TEST_VERSION_CURRENT, new MockAnalyzer(random)).setOpenMode(OpenMode.CREATE)); for(int i=0;i<2;i++) writer.addDocument(testDoc); writer.optimize(); diff --git a/lucene/src/test/org/apache/lucene/index/TestFilterIndexReader.java b/lucene/src/test/org/apache/lucene/index/TestFilterIndexReader.java index c17dc38b9aa..7d01ae079c1 100644 --- a/lucene/src/test/org/apache/lucene/index/TestFilterIndexReader.java +++ b/lucene/src/test/org/apache/lucene/index/TestFilterIndexReader.java @@ -126,7 +126,7 @@ public class TestFilterIndexReader extends LuceneTestCase { */ public void testFilterIndexReader() throws Exception { Directory directory = newDirectory(); - IndexWriter writer = new IndexWriter(directory, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer())); + IndexWriter writer = new IndexWriter(directory, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random))); Document d1 = new Document(); d1.add(newField("default","one two", Field.Store.YES, Field.Index.ANALYZED)); @@ -143,7 +143,7 @@ public class TestFilterIndexReader extends LuceneTestCase { writer.close(); Directory target = newDirectory(); - writer = new IndexWriter(target, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer())); + writer = new IndexWriter(target, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random))); IndexReader reader = new TestReader(IndexReader.open(directory, true)); writer.addIndexes(reader); writer.close(); diff --git a/lucene/src/test/org/apache/lucene/index/TestFlex.java b/lucene/src/test/org/apache/lucene/index/TestFlex.java index 7a55f34e770..6a5c1804734 100644 --- a/lucene/src/test/org/apache/lucene/index/TestFlex.java +++ b/lucene/src/test/org/apache/lucene/index/TestFlex.java @@ -32,7 +32,7 @@ public class TestFlex extends LuceneTestCase { IndexWriter w = new IndexWriter( d, - new IndexWriterConfig(Version.LUCENE_31, new MockAnalyzer()). + new IndexWriterConfig(Version.LUCENE_31, new MockAnalyzer(random)). setMaxBufferedDocs(7) ); @@ -64,7 +64,7 @@ public class TestFlex extends LuceneTestCase { public void testTermOrd() throws Exception { Directory d = newDirectory(); IndexWriter w = new IndexWriter(d, newIndexWriterConfig(TEST_VERSION_CURRENT, - new MockAnalyzer()).setCodecProvider(_TestUtil.alwaysCodec("Standard"))); + new MockAnalyzer(random)).setCodecProvider(_TestUtil.alwaysCodec("Standard"))); Document doc = new Document(); doc.add(newField("f", "a b c", Field.Store.NO, Field.Index.ANALYZED)); w.addDocument(doc); diff --git a/lucene/src/test/org/apache/lucene/index/TestFlushByRamOrCountsPolicy.java b/lucene/src/test/org/apache/lucene/index/TestFlushByRamOrCountsPolicy.java new file mode 100644 index 00000000000..7eb72130d3f --- /dev/null +++ b/lucene/src/test/org/apache/lucene/index/TestFlushByRamOrCountsPolicy.java @@ -0,0 +1,424 @@ +package org.apache.lucene.index; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Iterator; +import java.util.concurrent.atomic.AtomicInteger; + +import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.index.DocumentsWriterPerThreadPool.ThreadState; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.LockObtainFailedException; +import org.apache.lucene.store.MockDirectoryWrapper; +import org.apache.lucene.util.LineFileDocs; +import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.util.ThrottledIndexOutput; +import org.junit.Before; + +public class TestFlushByRamOrCountsPolicy extends LuceneTestCase { + + private LineFileDocs lineDocFile; + + @Before + @Override + public void setUp() throws Exception { + super.setUp(); + lineDocFile = new LineFileDocs(random); + } + + public void testFlushByRam() throws CorruptIndexException, + LockObtainFailedException, IOException, InterruptedException { + int[] numThreads = new int[] { 3 + random.nextInt(12), 1 }; + for (int i = 0; i < numThreads.length; i++) { + runFlushByRam(numThreads[i], + 1 + random.nextInt(10) + random.nextDouble(), false); + } + + for (int i = 0; i < numThreads.length; i++) { + // with a 256 mb ram buffer we should never stall + runFlushByRam(numThreads[i], 256.d, true); + } + } + + protected void runFlushByRam(int numThreads, double maxRamMB, + boolean ensureNotStalled) throws IOException, CorruptIndexException, + LockObtainFailedException, InterruptedException { + final int numDocumentsToIndex = 50 + random.nextInt(150); + AtomicInteger numDocs = new AtomicInteger(numDocumentsToIndex); + Directory dir = newDirectory(); + MockDefaultFlushPolicy flushPolicy = new MockDefaultFlushPolicy(); + IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, + new MockAnalyzer(random)).setFlushPolicy(flushPolicy); + final int numDWPT = 1 + random.nextInt(8); + DocumentsWriterPerThreadPool threadPool = new ThreadAffinityDocumentsWriterThreadPool( + numDWPT); + iwc.setIndexerThreadPool(threadPool); + iwc.setRAMBufferSizeMB(maxRamMB); + iwc.setMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH); + iwc.setMaxBufferedDeleteTerms(IndexWriterConfig.DISABLE_AUTO_FLUSH); + IndexWriter writer = new IndexWriter(dir, iwc); + assertFalse(flushPolicy.flushOnDocCount()); + assertFalse(flushPolicy.flushOnDeleteTerms()); + assertTrue(flushPolicy.flushOnRAM()); + DocumentsWriter docsWriter = writer.getDocsWriter(); + assertNotNull(docsWriter); + DocumentsWriterFlushControl flushControl = docsWriter.flushControl; + assertEquals(" bytes must be 0 after init", 0, flushControl.flushBytes()); + + IndexThread[] threads = new IndexThread[numThreads]; + for (int x = 0; x < threads.length; x++) { + threads[x] = new IndexThread(numDocs, numThreads, writer, lineDocFile, + false); + threads[x].start(); + } + + for (int x = 0; x < threads.length; x++) { + threads[x].join(); + } + final long maxRAMBytes = (long) (iwc.getRAMBufferSizeMB() * 1024. * 1024.); + assertEquals(" all flushes must be due numThreads=" + numThreads, 0, + flushControl.flushBytes()); + assertEquals(numDocumentsToIndex, writer.numDocs()); + assertEquals(numDocumentsToIndex, writer.maxDoc()); + assertTrue("peak bytes without flush exceeded watermark", + flushPolicy.peakBytesWithoutFlush <= maxRAMBytes); + assertActiveBytesAfter(flushControl); + if (flushPolicy.hasMarkedPending) { + assertTrue(maxRAMBytes < flushControl.peakActiveBytes); + } + if (ensureNotStalled) { + assertFalse(docsWriter.healthiness.wasStalled); + } + writer.close(); + assertEquals(0, flushControl.activeBytes()); + dir.close(); + } + + public void testFlushDocCount() throws CorruptIndexException, + LockObtainFailedException, IOException, InterruptedException { + int[] numThreads = new int[] { 3 + random.nextInt(12), 1 }; + for (int i = 0; i < numThreads.length; i++) { + + final int numDocumentsToIndex = 50 + random.nextInt(150); + AtomicInteger numDocs = new AtomicInteger(numDocumentsToIndex); + Directory dir = newDirectory(); + MockDefaultFlushPolicy flushPolicy = new MockDefaultFlushPolicy(); + IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, + new MockAnalyzer(random)).setFlushPolicy(flushPolicy); + + final int numDWPT = 1 + random.nextInt(8); + DocumentsWriterPerThreadPool threadPool = new ThreadAffinityDocumentsWriterThreadPool( + numDWPT); + iwc.setIndexerThreadPool(threadPool); + iwc.setMaxBufferedDocs(2 + random.nextInt(50)); + iwc.setRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH); + iwc.setMaxBufferedDeleteTerms(IndexWriterConfig.DISABLE_AUTO_FLUSH); + IndexWriter writer = new IndexWriter(dir, iwc); + assertTrue(flushPolicy.flushOnDocCount()); + assertFalse(flushPolicy.flushOnDeleteTerms()); + assertFalse(flushPolicy.flushOnRAM()); + DocumentsWriter docsWriter = writer.getDocsWriter(); + assertNotNull(docsWriter); + DocumentsWriterFlushControl flushControl = docsWriter.flushControl; + assertEquals(" bytes must be 0 after init", 0, flushControl.flushBytes()); + + IndexThread[] threads = new IndexThread[numThreads[i]]; + for (int x = 0; x < threads.length; x++) { + threads[x] = new IndexThread(numDocs, numThreads[i], writer, + lineDocFile, false); + threads[x].start(); + } + + for (int x = 0; x < threads.length; x++) { + threads[x].join(); + } + + assertEquals(" all flushes must be due numThreads=" + numThreads[i], 0, + flushControl.flushBytes()); + assertEquals(numDocumentsToIndex, writer.numDocs()); + assertEquals(numDocumentsToIndex, writer.maxDoc()); + assertTrue("peak bytes without flush exceeded watermark", + flushPolicy.peakDocCountWithoutFlush <= iwc.getMaxBufferedDocs()); + assertActiveBytesAfter(flushControl); + writer.close(); + assertEquals(0, flushControl.activeBytes()); + dir.close(); + } + } + + public void testRandom() throws IOException, InterruptedException { + final int numThreads = 1 + random.nextInt(8); + final int numDocumentsToIndex = 100 + random.nextInt(300); + AtomicInteger numDocs = new AtomicInteger(numDocumentsToIndex); + Directory dir = newDirectory(); + IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, + new MockAnalyzer(random)); + MockDefaultFlushPolicy flushPolicy = new MockDefaultFlushPolicy(); + iwc.setFlushPolicy(flushPolicy); + + final int numDWPT = 1 + random.nextInt(8); + DocumentsWriterPerThreadPool threadPool = new ThreadAffinityDocumentsWriterThreadPool( + numDWPT); + iwc.setIndexerThreadPool(threadPool); + + IndexWriter writer = new IndexWriter(dir, iwc); + DocumentsWriter docsWriter = writer.getDocsWriter(); + assertNotNull(docsWriter); + DocumentsWriterFlushControl flushControl = docsWriter.flushControl; + + assertEquals(" bytes must be 0 after init", 0, flushControl.flushBytes()); + + IndexThread[] threads = new IndexThread[numThreads]; + for (int x = 0; x < threads.length; x++) { + threads[x] = new IndexThread(numDocs, numThreads, writer, lineDocFile, + true); + threads[x].start(); + } + + for (int x = 0; x < threads.length; x++) { + threads[x].join(); + } + assertEquals(" all flushes must be due", 0, flushControl.flushBytes()); + assertEquals(numDocumentsToIndex, writer.numDocs()); + assertEquals(numDocumentsToIndex, writer.maxDoc()); + if (flushPolicy.flushOnRAM() && !flushPolicy.flushOnDocCount() + && !flushPolicy.flushOnDeleteTerms()) { + final long maxRAMBytes = (long) (iwc.getRAMBufferSizeMB() * 1024. * 1024.); + assertTrue("peak bytes without flush exceeded watermark", + flushPolicy.peakBytesWithoutFlush <= maxRAMBytes); + if (flushPolicy.hasMarkedPending) { + assertTrue("max: " + maxRAMBytes + " " + flushControl.peakActiveBytes, + maxRAMBytes <= flushControl.peakActiveBytes); + } + } + assertActiveBytesAfter(flushControl); + writer.commit(); + assertEquals(0, flushControl.activeBytes()); + IndexReader r = IndexReader.open(dir); + assertEquals(numDocumentsToIndex, r.numDocs()); + assertEquals(numDocumentsToIndex, r.maxDoc()); + if (!flushPolicy.flushOnRAM()) { + assertFalse("never stall if we don't flush on RAM", docsWriter.healthiness.wasStalled); + assertFalse("never block if we don't flush on RAM", docsWriter.healthiness.hasBlocked()); + } + r.close(); + writer.close(); + dir.close(); + } + + public void testHealthyness() throws InterruptedException, + CorruptIndexException, LockObtainFailedException, IOException { + + int[] numThreads = new int[] { 4 + random.nextInt(8), 1 }; + final int numDocumentsToIndex = 50 + random.nextInt(50); + for (int i = 0; i < numThreads.length; i++) { + AtomicInteger numDocs = new AtomicInteger(numDocumentsToIndex); + MockDirectoryWrapper dir = newDirectory(); + // mock a very slow harddisk here so that flushing is very slow + dir.setThrottledIndexOutput(new ThrottledIndexOutput(ThrottledIndexOutput + .mBitsToBytes(40 + random.nextInt(10)), 5 + random.nextInt(5), null)); + IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, + new MockAnalyzer(random)); + iwc.setMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH); + iwc.setMaxBufferedDeleteTerms(IndexWriterConfig.DISABLE_AUTO_FLUSH); + FlushPolicy flushPolicy = new FlushByRamOrCountsPolicy(); + iwc.setFlushPolicy(flushPolicy); + + DocumentsWriterPerThreadPool threadPool = new ThreadAffinityDocumentsWriterThreadPool( + numThreads[i]== 1 ? 1 : 2); + iwc.setIndexerThreadPool(threadPool); + // with such a small ram buffer we should be stalled quiet quickly + iwc.setRAMBufferSizeMB(0.25); + IndexWriter writer = new IndexWriter(dir, iwc); + IndexThread[] threads = new IndexThread[numThreads[i]]; + for (int x = 0; x < threads.length; x++) { + threads[x] = new IndexThread(numDocs, numThreads[i], writer, + lineDocFile, false); + threads[x].start(); + } + + for (int x = 0; x < threads.length; x++) { + threads[x].join(); + } + DocumentsWriter docsWriter = writer.getDocsWriter(); + assertNotNull(docsWriter); + DocumentsWriterFlushControl flushControl = docsWriter.flushControl; + assertEquals(" all flushes must be due", 0, flushControl.flushBytes()); + assertEquals(numDocumentsToIndex, writer.numDocs()); + assertEquals(numDocumentsToIndex, writer.maxDoc()); + if (numThreads[i] == 1) { + assertFalse( + "single thread must not stall", + docsWriter.healthiness.wasStalled); + assertFalse( + "single thread must not block numThreads: " + numThreads[i], + docsWriter.healthiness.hasBlocked()); + } + assertActiveBytesAfter(flushControl); + writer.close(true); + dir.close(); + } + } + + protected void assertActiveBytesAfter(DocumentsWriterFlushControl flushControl) { + Iterator allActiveThreads = flushControl.allActiveThreads(); + long bytesUsed = 0; + while (allActiveThreads.hasNext()) { + bytesUsed += allActiveThreads.next().perThread.bytesUsed(); + } + assertEquals(bytesUsed, flushControl.activeBytes()); + } + + public class IndexThread extends Thread { + IndexWriter writer; + IndexWriterConfig iwc; + LineFileDocs docs; + private AtomicInteger pendingDocs; + private final boolean doRandomCommit; + + public IndexThread(AtomicInteger pendingDocs, int numThreads, + IndexWriter writer, LineFileDocs docs, boolean doRandomCommit) { + this.pendingDocs = pendingDocs; + this.writer = writer; + iwc = writer.getConfig(); + this.docs = docs; + this.doRandomCommit = doRandomCommit; + } + + public void run() { + try { + long ramSize = 0; + while (pendingDocs.decrementAndGet() > -1) { + Document doc = docs.nextDoc(); + writer.addDocument(doc); + long newRamSize = writer.ramSizeInBytes(); + if (newRamSize != ramSize) { + ramSize = newRamSize; + } + if (doRandomCommit) { + int commit; + synchronized (random) { + commit = random.nextInt(20); + } + if (commit == 0) { + writer.commit(); + } + } + } + writer.commit(); + } catch (Throwable ex) { + throw new RuntimeException(ex); + } + } + } + + private static class MockDefaultFlushPolicy extends FlushByRamOrCountsPolicy { + long peakBytesWithoutFlush = Integer.MIN_VALUE; + long peakDocCountWithoutFlush = Integer.MIN_VALUE; + boolean hasMarkedPending = false; + + @Override + public void onDelete(DocumentsWriterFlushControl control, ThreadState state) { + final ArrayList pending = new ArrayList(); + final ArrayList notPending = new ArrayList(); + findPending(control, pending, notPending); + final boolean flushCurrent = state.flushPending; + final ThreadState toFlush; + if (state.flushPending) { + toFlush = state; + } else if (flushOnDeleteTerms() + && state.perThread.pendingDeletes.numTermDeletes.get() >= indexWriterConfig + .getMaxBufferedDeleteTerms()) { + toFlush = state; + } else { + toFlush = null; + } + super.onDelete(control, state); + if (toFlush != null) { + if (flushCurrent) { + assertTrue(pending.remove(toFlush)); + } else { + assertTrue(notPending.remove(toFlush)); + } + assertTrue(toFlush.flushPending); + hasMarkedPending = true; + } + + for (ThreadState threadState : notPending) { + assertFalse(threadState.flushPending); + } + } + + @Override + public void onInsert(DocumentsWriterFlushControl control, ThreadState state) { + final ArrayList pending = new ArrayList(); + final ArrayList notPending = new ArrayList(); + findPending(control, pending, notPending); + final boolean flushCurrent = state.flushPending; + long activeBytes = control.activeBytes(); + final ThreadState toFlush; + if (state.flushPending) { + toFlush = state; + } else if (flushOnDocCount() + && state.perThread.getNumDocsInRAM() >= indexWriterConfig + .getMaxBufferedDocs()) { + toFlush = state; + } else if (flushOnRAM() + && activeBytes >= (long) (indexWriterConfig.getRAMBufferSizeMB() * 1024. * 1024.)) { + toFlush = findLargestNonPendingWriter(control, state); + assertFalse(toFlush.flushPending); + } else { + toFlush = null; + } + super.onInsert(control, state); + if (toFlush != null) { + if (flushCurrent) { + assertTrue(pending.remove(toFlush)); + } else { + assertTrue(notPending.remove(toFlush)); + } + assertTrue(toFlush.flushPending); + hasMarkedPending = true; + } else { + peakBytesWithoutFlush = Math.max(activeBytes, peakBytesWithoutFlush); + peakDocCountWithoutFlush = Math.max(state.perThread.getNumDocsInRAM(), + peakDocCountWithoutFlush); + } + + for (ThreadState threadState : notPending) { + assertFalse(threadState.flushPending); + } + } + } + + static void findPending(DocumentsWriterFlushControl flushControl, + ArrayList pending, ArrayList notPending) { + Iterator allActiveThreads = flushControl.allActiveThreads(); + while (allActiveThreads.hasNext()) { + ThreadState next = allActiveThreads.next(); + if (next.flushPending) { + pending.add(next); + } else { + notPending.add(next); + } + } + } +} diff --git a/lucene/src/test/org/apache/lucene/index/TestGlobalFieldNumbers.java b/lucene/src/test/org/apache/lucene/index/TestGlobalFieldNumbers.java index 9fd8357c320..8bb9a6b02e3 100644 --- a/lucene/src/test/org/apache/lucene/index/TestGlobalFieldNumbers.java +++ b/lucene/src/test/org/apache/lucene/index/TestGlobalFieldNumbers.java @@ -46,7 +46,7 @@ public class TestGlobalFieldNumbers extends LuceneTestCase { Directory dir = newDirectory(); { IndexWriterConfig config = newIndexWriterConfig(TEST_VERSION_CURRENT, - new MockAnalyzer()); + new MockAnalyzer(random)); IndexWriter writer = new IndexWriter(dir, config); Document d = new Document(); d.add(new Field("f1", "d1 first field", Store.YES, Index.ANALYZED, @@ -83,7 +83,7 @@ public class TestGlobalFieldNumbers extends LuceneTestCase { { IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( - TEST_VERSION_CURRENT, new MockAnalyzer())); + TEST_VERSION_CURRENT, new MockAnalyzer(random))); Document d = new Document(); d.add(new Field("f1", "d3 first field", Store.YES, Index.ANALYZED, TermVector.NO)); @@ -102,7 +102,7 @@ public class TestGlobalFieldNumbers extends LuceneTestCase { } IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( - TEST_VERSION_CURRENT, new MockAnalyzer())); + TEST_VERSION_CURRENT, new MockAnalyzer(random))); writer.optimize(); assertFalse(" field numbers got mixed up", writer.anyNonBulkMerges); writer.close(); @@ -117,7 +117,7 @@ public class TestGlobalFieldNumbers extends LuceneTestCase { Directory dir = newDirectory(); { IndexWriterConfig config = newIndexWriterConfig(TEST_VERSION_CURRENT, - new MockAnalyzer()); + new MockAnalyzer(random)); IndexWriter writer = new IndexWriter(dir, config); Document d = new Document(); d.add(new Field("f1", "d1 first field", Store.YES, Index.ANALYZED, @@ -145,7 +145,7 @@ public class TestGlobalFieldNumbers extends LuceneTestCase { assertFNXFiles(dir, "2.fnx"); IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( - TEST_VERSION_CURRENT, new MockAnalyzer())); + TEST_VERSION_CURRENT, new MockAnalyzer(random))); writer.optimize(); assertFalse(" field numbers got mixed up", writer.anyNonBulkMerges); writer.close(); @@ -160,7 +160,7 @@ public class TestGlobalFieldNumbers extends LuceneTestCase { Directory dir = newDirectory(); { IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( - TEST_VERSION_CURRENT, new MockAnalyzer()).setIndexDeletionPolicy( + TEST_VERSION_CURRENT, new MockAnalyzer(random)).setIndexDeletionPolicy( new KeepAllDeletionPolicy())); Document d = new Document(); d.add(new Field("f1", "d1 first field", Store.YES, Index.ANALYZED, @@ -185,7 +185,7 @@ public class TestGlobalFieldNumbers extends LuceneTestCase { { IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( - TEST_VERSION_CURRENT, new MockAnalyzer())); + TEST_VERSION_CURRENT, new MockAnalyzer(random))); Document d = new Document(); d.add(new Field("f1", "d3 first field", Store.YES, Index.ANALYZED, TermVector.NO)); @@ -197,7 +197,7 @@ public class TestGlobalFieldNumbers extends LuceneTestCase { assertFNXFiles(dir, "2.fnx"); } IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( - TEST_VERSION_CURRENT, new MockAnalyzer())); + TEST_VERSION_CURRENT, new MockAnalyzer(random))); writer.optimize(); assertFalse(" field numbers got mixed up", writer.anyNonBulkMerges); writer.close(); @@ -210,7 +210,7 @@ public class TestGlobalFieldNumbers extends LuceneTestCase { for (int i = 0; i < 39; i++) { Directory dir = newDirectory(); IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( - TEST_VERSION_CURRENT, new MockAnalyzer()).setIndexDeletionPolicy( + TEST_VERSION_CURRENT, new MockAnalyzer(random)).setIndexDeletionPolicy( new KeepAllDeletionPolicy())); Document d = new Document(); d.add(new Field("f1", "d1 first field", Store.YES, Index.ANALYZED, @@ -232,7 +232,7 @@ public class TestGlobalFieldNumbers extends LuceneTestCase { List listCommits = IndexReader.listCommits(dir); assertEquals(2, listCommits.size()); writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, - new MockAnalyzer()).setIndexDeletionPolicy( + new MockAnalyzer(random)).setIndexDeletionPolicy( new KeepAllDeletionPolicy()).setIndexCommit(listCommits.get(0))); d = new Document(); @@ -247,7 +247,7 @@ public class TestGlobalFieldNumbers extends LuceneTestCase { assertFNXFiles(dir, "1.fnx", "2.fnx", "3.fnx"); writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, - new MockAnalyzer())); + new MockAnalyzer(random))); writer.commit(); listCommits = IndexReader.listCommits(dir); assertEquals(1, listCommits.size()); @@ -290,9 +290,9 @@ public class TestGlobalFieldNumbers extends LuceneTestCase { } Directory base = buildRandomIndex(fieldNames.toArray(new String[0]), 20 + random.nextInt(100), - newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer())); + newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random))); IndexWriter writer = new IndexWriter(base, newIndexWriterConfig( - TEST_VERSION_CURRENT, new MockAnalyzer())); + TEST_VERSION_CURRENT, new MockAnalyzer(random))); FieldNumberBiMap globalFieldMap = writer.segmentInfos .getOrLoadGlobalFieldNumberMap(base); Set> entries = globalFieldMap.entries(); @@ -315,7 +315,7 @@ public class TestGlobalFieldNumbers extends LuceneTestCase { Directory base = newDirectory(); IndexWriter writer = new IndexWriter(base, newIndexWriterConfig( - TEST_VERSION_CURRENT, new MockAnalyzer())); + TEST_VERSION_CURRENT, new MockAnalyzer(random))); Document doc = new Document(); for (String string : fieldNames) { doc.add(newField(string, @@ -339,9 +339,9 @@ public class TestGlobalFieldNumbers extends LuceneTestCase { for (int j = 0; j < numIndexes; j++) { Directory toAdd = buildRandomIndex(fieldNames.toArray(new String[0]), 1 + random.nextInt(50), - newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer())); + newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random))); IndexWriter w = new IndexWriter(base, newIndexWriterConfig( - TEST_VERSION_CURRENT, new MockAnalyzer())); + TEST_VERSION_CURRENT, new MockAnalyzer(random))); if (random.nextBoolean()) { IndexReader open = IndexReader.open(toAdd); w.addIndexes(open); @@ -357,7 +357,7 @@ public class TestGlobalFieldNumbers extends LuceneTestCase { toAdd.close(); } IndexWriter w = new IndexWriter(base, newIndexWriterConfig( - TEST_VERSION_CURRENT, new MockAnalyzer()).setMergePolicy( + TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMergePolicy( new LogByteSizeMergePolicy())); w.optimize(); w.close(); @@ -402,7 +402,7 @@ public class TestGlobalFieldNumbers extends LuceneTestCase { } Directory base = newDirectory(); IndexWriter writer = new IndexWriter(base, newIndexWriterConfig( - TEST_VERSION_CURRENT, new MockAnalyzer()).setMergePolicy( + TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMergePolicy( NoMergePolicy.NO_COMPOUND_FILES)); SortedMap copySortedMap = new TreeMap( @@ -428,7 +428,7 @@ public class TestGlobalFieldNumbers extends LuceneTestCase { writer.close(); writer = new IndexWriter(base, newIndexWriterConfig(TEST_VERSION_CURRENT, - new MockAnalyzer()).setMergePolicy(NoMergePolicy.NO_COMPOUND_FILES)); + new MockAnalyzer(random)).setMergePolicy(NoMergePolicy.NO_COMPOUND_FILES)); writer.commit(); // make sure the old index is the latest segment writer.close(); @@ -459,7 +459,7 @@ public class TestGlobalFieldNumbers extends LuceneTestCase { .unzip(getDataFile("index." + oldNames[i] + ".zip"), oldIndxeDir); dir = newFSDirectory(oldIndxeDir); IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( - TEST_VERSION_CURRENT, new MockAnalyzer()).setMergePolicy(policy)); + TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMergePolicy(policy)); SegmentInfos segmentInfos = writer.segmentInfos; assertTrue(DefaultSegmentInfosWriter.FORMAT_4_0 < segmentInfos.getFormat()); assertEquals(0, segmentInfos.getGlobalFieldMapVersion()); diff --git a/lucene/src/test/org/apache/lucene/index/TestIndexFileDeleter.java b/lucene/src/test/org/apache/lucene/index/TestIndexFileDeleter.java index d075fc37620..21525d8ed76 100644 --- a/lucene/src/test/org/apache/lucene/index/TestIndexFileDeleter.java +++ b/lucene/src/test/org/apache/lucene/index/TestIndexFileDeleter.java @@ -48,7 +48,7 @@ public class TestIndexFileDeleter extends LuceneTestCase { IndexWriter writer = new IndexWriter( dir, - newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()). + newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)). setMaxBufferedDocs(10). setMergePolicy(mergePolicy) ); @@ -152,7 +152,7 @@ public class TestIndexFileDeleter extends LuceneTestCase { // Open & close a writer: it should delete the above 4 // files and nothing more: - writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).setOpenMode(OpenMode.APPEND)); + writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setOpenMode(OpenMode.APPEND)); writer.close(); String[] files2 = dir.listAll(); diff --git a/lucene/src/test/org/apache/lucene/index/TestIndexReader.java b/lucene/src/test/org/apache/lucene/index/TestIndexReader.java index 2562e5c5ce4..7f7fa8bf34f 100644 --- a/lucene/src/test/org/apache/lucene/index/TestIndexReader.java +++ b/lucene/src/test/org/apache/lucene/index/TestIndexReader.java @@ -69,7 +69,7 @@ public class TestIndexReader extends LuceneTestCase // set up writer IndexWriter writer = new IndexWriter(d, newIndexWriterConfig( - TEST_VERSION_CURRENT, new MockAnalyzer()) + TEST_VERSION_CURRENT, new MockAnalyzer(random)) .setMaxBufferedDocs(2)); for(int i=0;i<27;i++) addDocumentWithFields(writer); @@ -92,7 +92,7 @@ public class TestIndexReader extends LuceneTestCase // Change the index writer = new IndexWriter(d, newIndexWriterConfig(TEST_VERSION_CURRENT, - new MockAnalyzer()).setOpenMode( + new MockAnalyzer(random)).setOpenMode( OpenMode.APPEND).setMaxBufferedDocs(2)); for(int i=0;i<7;i++) addDocumentWithFields(writer); @@ -104,7 +104,7 @@ public class TestIndexReader extends LuceneTestCase r3.close(); writer = new IndexWriter(d, newIndexWriterConfig(TEST_VERSION_CURRENT, - new MockAnalyzer()) + new MockAnalyzer(random)) .setOpenMode(OpenMode.APPEND)); writer.optimize(); writer.close(); @@ -119,7 +119,7 @@ public class TestIndexReader extends LuceneTestCase public void testIsCurrent() throws Exception { Directory d = newDirectory(); IndexWriter writer = new IndexWriter(d, newIndexWriterConfig( - TEST_VERSION_CURRENT, new MockAnalyzer())); + TEST_VERSION_CURRENT, new MockAnalyzer(random))); addDocumentWithFields(writer); writer.close(); // set up reader: @@ -127,13 +127,13 @@ public class TestIndexReader extends LuceneTestCase assertTrue(reader.isCurrent()); // modify index by adding another document: writer = new IndexWriter(d, newIndexWriterConfig(TEST_VERSION_CURRENT, - new MockAnalyzer()).setOpenMode(OpenMode.APPEND)); + new MockAnalyzer(random)).setOpenMode(OpenMode.APPEND)); addDocumentWithFields(writer); writer.close(); assertFalse(reader.isCurrent()); // re-create index: writer = new IndexWriter(d, newIndexWriterConfig(TEST_VERSION_CURRENT, - new MockAnalyzer()).setOpenMode(OpenMode.CREATE)); + new MockAnalyzer(random)).setOpenMode(OpenMode.CREATE)); addDocumentWithFields(writer); writer.close(); assertFalse(reader.isCurrent()); @@ -150,7 +150,7 @@ public class TestIndexReader extends LuceneTestCase // set up writer IndexWriter writer = new IndexWriter( d, - newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()) + newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)) ); Document doc = new Document(); @@ -172,7 +172,7 @@ public class TestIndexReader extends LuceneTestCase // add more documents writer = new IndexWriter( d, - newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()). + newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)). setOpenMode(OpenMode.APPEND). setMergePolicy(newLogMergePolicy()) ); @@ -271,7 +271,7 @@ public class TestIndexReader extends LuceneTestCase // set up writer IndexWriter writer = new IndexWriter( d, - newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()). + newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)). setMergePolicy(newLogMergePolicy()) ); // want to get some more segments here @@ -330,7 +330,7 @@ public class TestIndexReader extends LuceneTestCase Term searchTerm = new Term("content", "aaa"); // add 100 documents with term : aaa - writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer())); + writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random))); writer.setInfoStream(VERBOSE ? System.out : null); for (int i = 0; i < 100; i++) { addDoc(writer, searchTerm.text()); @@ -371,7 +371,7 @@ public class TestIndexReader extends LuceneTestCase Directory dir = newDirectory(); byte[] bin = new byte[]{0, 1, 2, 3, 4, 5, 6, 7, 8, 9}; - IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).setMergePolicy(newInOrderLogMergePolicy())); + IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMergePolicy(newLogMergePolicy())); for (int i = 0; i < 10; i++) { addDoc(writer, "document number " + (i + 1)); @@ -380,7 +380,7 @@ public class TestIndexReader extends LuceneTestCase addDocumentWithTermVectorFields(writer); } writer.close(); - writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).setOpenMode(OpenMode.APPEND).setMergePolicy(newInOrderLogMergePolicy())); + writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setOpenMode(OpenMode.APPEND).setMergePolicy(newLogMergePolicy())); Document doc = new Document(); doc.add(new Field("bin1", bin)); doc.add(new Field("junk", "junk text", Field.Store.NO, Field.Index.ANALYZED)); @@ -417,7 +417,7 @@ public class TestIndexReader extends LuceneTestCase // force optimize - writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).setOpenMode(OpenMode.APPEND).setMergePolicy(newInOrderLogMergePolicy())); + writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setOpenMode(OpenMode.APPEND).setMergePolicy(newLogMergePolicy())); writer.optimize(); writer.close(); reader = IndexReader.open(dir, false); @@ -446,7 +446,7 @@ public class TestIndexReader extends LuceneTestCase Term searchTerm = new Term("content", "aaa"); // add 11 documents with term : aaa - writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer())); + writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random))); for (int i = 0; i < 11; i++) { addDoc(writer, searchTerm.text()); } @@ -489,7 +489,7 @@ public class TestIndexReader extends LuceneTestCase Term searchTerm = new Term("content", "aaa"); // add 11 documents with term : aaa - IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer())); + IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random))); writer.commit(); for (int i = 0; i < 11; i++) { addDoc(writer, searchTerm.text()); @@ -532,7 +532,7 @@ public class TestIndexReader extends LuceneTestCase Term searchTerm = new Term("content", "aaa"); // add 1 documents with term : aaa - IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer())); + IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random))); addDoc(writer, searchTerm.text()); writer.close(); @@ -577,7 +577,7 @@ public class TestIndexReader extends LuceneTestCase // add 1 documents with term : aaa writer = new IndexWriter( dir, - newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()). + newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)). setMergePolicy(newLogMergePolicy(false)) ); addDoc(writer, searchTerm.text()); @@ -632,7 +632,7 @@ public class TestIndexReader extends LuceneTestCase Term searchTerm2 = new Term("content", "bbb"); // add 100 documents with term : aaa - IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).setOpenMode(OpenMode.CREATE)); + IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setOpenMode(OpenMode.CREATE)); for (int i = 0; i < 100; i++) { addDoc(writer, searchTerm.text()); } @@ -647,7 +647,7 @@ public class TestIndexReader extends LuceneTestCase assertTermDocsCount("first reader", reader, searchTerm2, 0); // add 100 documents with term : bbb - writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).setOpenMode(OpenMode.APPEND)); + writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setOpenMode(OpenMode.APPEND)); for (int i = 0; i < 100; i++) { addDoc(writer, searchTerm2.text()); } @@ -708,7 +708,7 @@ public class TestIndexReader extends LuceneTestCase // Create initial data set File dirFile = _TestUtil.getTempDir("TestIndexReader.testFilesOpenClose"); Directory dir = newFSDirectory(dirFile); - IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer())); + IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random))); addDoc(writer, "test"); writer.close(); dir.close(); @@ -718,7 +718,7 @@ public class TestIndexReader extends LuceneTestCase dir = newFSDirectory(dirFile); // Now create the data set again, just as before - writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).setOpenMode(OpenMode.CREATE)); + writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setOpenMode(OpenMode.CREATE)); addDoc(writer, "test"); writer.close(); dir.close(); @@ -738,7 +738,7 @@ public class TestIndexReader extends LuceneTestCase for(int i=0;i<2;i++) { final Directory dir = newDirectory(); assertFalse(IndexReader.indexExists(dir)); - IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).setOpenMode(OpenMode.CREATE)); + IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setOpenMode(OpenMode.CREATE)); addDocumentWithFields(writer); assertTrue(IndexWriter.isLocked(dir)); // writer open, so dir is locked writer.close(); @@ -755,7 +755,7 @@ public class TestIndexReader extends LuceneTestCase // incremented: Thread.sleep(1000); - writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).setOpenMode(OpenMode.CREATE)); + writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setOpenMode(OpenMode.CREATE)); addDocumentWithFields(writer); writer.close(); reader = IndexReader.open(dir, false); @@ -768,7 +768,7 @@ public class TestIndexReader extends LuceneTestCase public void testVersion() throws IOException { Directory dir = newDirectory(); assertFalse(IndexReader.indexExists(dir)); - IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer())); + IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random))); addDocumentWithFields(writer); assertTrue(IndexWriter.isLocked(dir)); // writer open, so dir is locked writer.close(); @@ -779,7 +779,7 @@ public class TestIndexReader extends LuceneTestCase reader.close(); // modify index and check version has been // incremented: - writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).setOpenMode(OpenMode.CREATE)); + writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setOpenMode(OpenMode.CREATE)); addDocumentWithFields(writer); writer.close(); reader = IndexReader.open(dir, false); @@ -790,10 +790,10 @@ public class TestIndexReader extends LuceneTestCase public void testLock() throws IOException { Directory dir = newDirectory(); - IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer())); + IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random))); addDocumentWithFields(writer); writer.close(); - writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).setOpenMode(OpenMode.APPEND)); + writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setOpenMode(OpenMode.APPEND)); IndexReader reader = IndexReader.open(dir, false); try { reader.deleteDocument(0); @@ -814,7 +814,7 @@ public class TestIndexReader extends LuceneTestCase public void testUndeleteAll() throws IOException { Directory dir = newDirectory(); - IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer())); + IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random))); addDocumentWithFields(writer); addDocumentWithFields(writer); writer.close(); @@ -831,7 +831,7 @@ public class TestIndexReader extends LuceneTestCase public void testUndeleteAllAfterClose() throws IOException { Directory dir = newDirectory(); - IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer())); + IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random))); addDocumentWithFields(writer); addDocumentWithFields(writer); writer.close(); @@ -847,7 +847,7 @@ public class TestIndexReader extends LuceneTestCase public void testUndeleteAllAfterCloseThenReopen() throws IOException { Directory dir = newDirectory(); - IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer())); + IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random))); addDocumentWithFields(writer); addDocumentWithFields(writer); writer.close(); @@ -883,7 +883,7 @@ public class TestIndexReader extends LuceneTestCase // First build up a starting index: MockDirectoryWrapper startDir = newDirectory(); - IndexWriter writer = new IndexWriter(startDir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer())); + IndexWriter writer = new IndexWriter(startDir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random))); if (VERBOSE) { System.out.println("TEST: create initial index"); writer.setInfoStream(System.out); @@ -1067,7 +1067,7 @@ public class TestIndexReader extends LuceneTestCase public void testDocsOutOfOrderJIRA140() throws IOException { Directory dir = newDirectory(); - IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer())); + IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random))); for(int i=0;i<11;i++) { addDoc(writer, "aaa"); } @@ -1085,7 +1085,7 @@ public class TestIndexReader extends LuceneTestCase } reader.close(); - writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).setOpenMode(OpenMode.APPEND)); + writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setOpenMode(OpenMode.APPEND)); // We must add more docs to get a new segment written for(int i=0;i<11;i++) { @@ -1107,7 +1107,7 @@ public class TestIndexReader extends LuceneTestCase public void testExceptionReleaseWriteLockJIRA768() throws IOException { Directory dir = newDirectory(); - IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer())); + IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random))); addDoc(writer, "aaa"); writer.close(); @@ -1139,7 +1139,7 @@ public class TestIndexReader extends LuceneTestCase } public void testOpenReaderAfterDelete() throws IOException { - File dirFile = new File(TEMP_DIR, "deletetest"); + File dirFile = _TestUtil.getTempDir("deletetest"); Directory dir = newFSDirectory(dirFile); try { IndexReader.open(dir, false); @@ -1163,7 +1163,7 @@ public class TestIndexReader extends LuceneTestCase public void testMultiReaderDeletes() throws Exception { Directory dir = newDirectory(); - RandomIndexWriter w= new RandomIndexWriter(random, dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).setMergePolicy(newInOrderLogMergePolicy())); + RandomIndexWriter w= new RandomIndexWriter(random, dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMergePolicy(newLogMergePolicy())); Document doc = new Document(); doc.add(newField("f", "doctor", Field.Store.NO, Field.Index.NOT_ANALYZED)); w.addDocument(doc); @@ -1199,7 +1199,7 @@ public class TestIndexReader extends LuceneTestCase // add 100 documents with term : aaa // add 100 documents with term : bbb // add 100 documents with term : ccc - IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).setOpenMode(OpenMode.CREATE)); + IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setOpenMode(OpenMode.CREATE)); for (int i = 0; i < 100; i++) { addDoc(writer, searchTerm1.text()); addDoc(writer, searchTerm2.text()); @@ -1421,7 +1421,7 @@ public class TestIndexReader extends LuceneTestCase // set up writer IndexWriter writer = new IndexWriter( d, - newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()). + newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)). setMaxBufferedDocs(2). setMergePolicy(newLogMergePolicy(10)) ); @@ -1441,7 +1441,7 @@ public class TestIndexReader extends LuceneTestCase // Change the index writer = new IndexWriter( d, - newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()). + newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)). setOpenMode(OpenMode.APPEND). setMaxBufferedDocs(2). setMergePolicy(newLogMergePolicy(10)) @@ -1456,7 +1456,7 @@ public class TestIndexReader extends LuceneTestCase r2.close(); writer = new IndexWriter(d, newIndexWriterConfig(TEST_VERSION_CURRENT, - new MockAnalyzer()) + new MockAnalyzer(random)) .setOpenMode(OpenMode.APPEND)); writer.optimize(); writer.close(); @@ -1472,7 +1472,7 @@ public class TestIndexReader extends LuceneTestCase public void testReadOnly() throws Throwable { Directory d = newDirectory(); IndexWriter writer = new IndexWriter(d, newIndexWriterConfig( - TEST_VERSION_CURRENT, new MockAnalyzer())); + TEST_VERSION_CURRENT, new MockAnalyzer(random))); addDocumentWithFields(writer); writer.commit(); addDocumentWithFields(writer); @@ -1488,7 +1488,7 @@ public class TestIndexReader extends LuceneTestCase writer = new IndexWriter( d, - newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()). + newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)). setOpenMode(OpenMode.APPEND). setMergePolicy(newLogMergePolicy(10)) ); @@ -1509,7 +1509,7 @@ public class TestIndexReader extends LuceneTestCase } writer = new IndexWriter(d, newIndexWriterConfig(TEST_VERSION_CURRENT, - new MockAnalyzer()) + new MockAnalyzer(random)) .setOpenMode(OpenMode.APPEND)); writer.optimize(); writer.close(); @@ -1530,7 +1530,7 @@ public class TestIndexReader extends LuceneTestCase // Make sure write lock isn't held writer = new IndexWriter(d, newIndexWriterConfig(TEST_VERSION_CURRENT, - new MockAnalyzer()) + new MockAnalyzer(random)) .setOpenMode(OpenMode.APPEND)); writer.close(); @@ -1543,7 +1543,7 @@ public class TestIndexReader extends LuceneTestCase public void testIndexReader() throws Exception { Directory dir = newDirectory(); IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( - TEST_VERSION_CURRENT, new MockAnalyzer())); + TEST_VERSION_CURRENT, new MockAnalyzer(random))); writer.addDocument(createDocument("a")); writer.addDocument(createDocument("b")); writer.addDocument(createDocument("c")); @@ -1562,7 +1562,7 @@ public class TestIndexReader extends LuceneTestCase MockDirectoryWrapper dir = newDirectory(); dir.setPreventDoubleWrite(false); IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( - TEST_VERSION_CURRENT, new MockAnalyzer())); + TEST_VERSION_CURRENT, new MockAnalyzer(random))); writer.addDocument(createDocument("a")); writer.addDocument(createDocument("b")); writer.addDocument(createDocument("c")); @@ -1604,7 +1604,7 @@ public class TestIndexReader extends LuceneTestCase Directory dir = newDirectory(); IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( - TEST_VERSION_CURRENT, new MockAnalyzer()) + TEST_VERSION_CURRENT, new MockAnalyzer(random)) .setMaxBufferedDocs(2)); writer.addDocument(createDocument("a")); writer.addDocument(createDocument("a")); @@ -1628,7 +1628,7 @@ public class TestIndexReader extends LuceneTestCase // reuse the doc values arrays in FieldCache public void testFieldCacheReuseAfterClone() throws Exception { Directory dir = newDirectory(); - IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer())); + IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random))); Document doc = new Document(); doc.add(newField("number", "17", Field.Store.NO, Field.Index.NOT_ANALYZED)); writer.addDocument(doc); @@ -1661,7 +1661,7 @@ public class TestIndexReader extends LuceneTestCase Directory dir = newDirectory(); IndexWriter writer = new IndexWriter( dir, - newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()). + newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)). setMergePolicy(newLogMergePolicy(10)) ); Document doc = new Document(); @@ -1697,7 +1697,7 @@ public class TestIndexReader extends LuceneTestCase Directory dir = newDirectory(); IndexWriter writer = new IndexWriter( dir, - newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()). + newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)). setMaxBufferedDocs(-1). setMergePolicy(newLogMergePolicy(10)) ); @@ -1741,7 +1741,7 @@ public class TestIndexReader extends LuceneTestCase // LUCENE-1586: getUniqueTermCount public void testUniqueTermCount() throws Exception { Directory dir = newDirectory(); - IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).setCodecProvider(_TestUtil.alwaysCodec("Standard"))); + IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setCodecProvider(_TestUtil.alwaysCodec("Standard"))); Document doc = new Document(); doc.add(newField("field", "a b c d e f g h i j k l m n o p q r s t u v w x y z", Field.Store.NO, Field.Index.ANALYZED)); doc.add(newField("number", "0 1 2 3 4 5 6 7 8 9", Field.Store.NO, Field.Index.ANALYZED)); @@ -1774,7 +1774,7 @@ public class TestIndexReader extends LuceneTestCase // LUCENE-1609: don't load terms index public void testNoTermsIndex() throws Throwable { Directory dir = newDirectory(); - IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).setCodecProvider(_TestUtil.alwaysCodec("Standard"))); + IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setCodecProvider(_TestUtil.alwaysCodec("Standard"))); Document doc = new Document(); doc.add(newField("field", "a b c d e f g h i j k l m n o p q r s t u v w x y z", Field.Store.NO, Field.Index.ANALYZED)); doc.add(newField("number", "0 1 2 3 4 5 6 7 8 9", Field.Store.NO, Field.Index.ANALYZED)); @@ -1793,7 +1793,7 @@ public class TestIndexReader extends LuceneTestCase assertEquals(-1, ((SegmentReader) r.getSequentialSubReaders()[0]).getTermInfosIndexDivisor()); writer = new IndexWriter( dir, - newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()). + newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)). setCodecProvider(_TestUtil.alwaysCodec("Standard")). setMergePolicy(newLogMergePolicy(10)) ); @@ -1821,7 +1821,7 @@ public class TestIndexReader extends LuceneTestCase public void testPrepareCommitIsCurrent() throws Throwable { Directory dir = newDirectory(); IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( - TEST_VERSION_CURRENT, new MockAnalyzer())); + TEST_VERSION_CURRENT, new MockAnalyzer(random))); writer.commit(); Document doc = new Document(); writer.addDocument(doc); @@ -1866,7 +1866,7 @@ public class TestIndexReader extends LuceneTestCase // LUCENE-2812 public void testIndexExists() throws Exception { Directory dir = newDirectory(); - IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer())); + IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random))); writer.addDocument(new Document()); writer.prepareCommit(); assertFalse(IndexReader.indexExists(dir)); @@ -1879,7 +1879,7 @@ public class TestIndexReader extends LuceneTestCase // dict cache public void testTotalTermFreqCached() throws Exception { Directory dir = newDirectory(); - IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer())); + IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random))); Document d = new Document(); d.add(newField("f", "a a b", Field.Index.ANALYZED)); writer.addDocument(d); @@ -1901,7 +1901,7 @@ public class TestIndexReader extends LuceneTestCase // LUCENE-2474 public void testReaderFinishedListener() throws Exception { Directory dir = newDirectory(); - IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).setMergePolicy(newLogMergePolicy())); + IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMergePolicy(newLogMergePolicy())); ((LogMergePolicy) writer.getConfig().getMergePolicy()).setMergeFactor(3); writer.setInfoStream(VERBOSE ? System.out : null); writer.addDocument(new Document()); diff --git a/lucene/src/test/org/apache/lucene/index/TestIndexReaderClone.java b/lucene/src/test/org/apache/lucene/index/TestIndexReaderClone.java index 3cd7ac07eca..c26dd34aa92 100644 --- a/lucene/src/test/org/apache/lucene/index/TestIndexReaderClone.java +++ b/lucene/src/test/org/apache/lucene/index/TestIndexReaderClone.java @@ -199,7 +199,7 @@ public class TestIndexReaderClone extends LuceneTestCase { TestIndexReaderReopen.createIndex(random, dir1, true); IndexReader reader1 = IndexReader.open(dir1, false); IndexWriter w = new IndexWriter(dir1, newIndexWriterConfig( - TEST_VERSION_CURRENT, new MockAnalyzer())); + TEST_VERSION_CURRENT, new MockAnalyzer(random))); w.optimize(); w.close(); IndexReader reader2 = reader1.clone(true); @@ -496,7 +496,7 @@ public class TestIndexReaderClone extends LuceneTestCase { final Directory dir = newDirectory(); IndexWriter w = new IndexWriter( dir, - newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()). + newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)). setMergePolicy(newLogMergePolicy(false)) ); Document doc = new Document(); diff --git a/lucene/src/test/org/apache/lucene/index/TestIndexReaderCloneNorms.java b/lucene/src/test/org/apache/lucene/index/TestIndexReaderCloneNorms.java index 9ff5847d3de..34064dd9f28 100644 --- a/lucene/src/test/org/apache/lucene/index/TestIndexReaderCloneNorms.java +++ b/lucene/src/test/org/apache/lucene/index/TestIndexReaderCloneNorms.java @@ -75,7 +75,7 @@ public class TestIndexReaderCloneNorms extends LuceneTestCase { public void setUp() throws Exception { super.setUp(); similarityProviderOne = new SimilarityProviderOne(); - anlzr = new MockAnalyzer(); + anlzr = new MockAnalyzer(random); } /** diff --git a/lucene/src/test/org/apache/lucene/index/TestIndexReaderReopen.java b/lucene/src/test/org/apache/lucene/index/TestIndexReaderReopen.java index e30f7d61ca2..f55a0c7e2c1 100644 --- a/lucene/src/test/org/apache/lucene/index/TestIndexReaderReopen.java +++ b/lucene/src/test/org/apache/lucene/index/TestIndexReaderReopen.java @@ -173,8 +173,8 @@ public class TestIndexReaderReopen extends LuceneTestCase { private void doTestReopenWithCommit (Random random, Directory dir, boolean withReopen) throws IOException { IndexWriter iwriter = new IndexWriter(dir, newIndexWriterConfig( - TEST_VERSION_CURRENT, new MockAnalyzer()).setOpenMode( - OpenMode.CREATE).setMergeScheduler(new SerialMergeScheduler()).setMergePolicy(newInOrderLogMergePolicy())); + TEST_VERSION_CURRENT, new MockAnalyzer(random)).setOpenMode( + OpenMode.CREATE).setMergeScheduler(new SerialMergeScheduler()).setMergePolicy(newLogMergePolicy())); iwriter.commit(); IndexReader reader = IndexReader.open(dir, false); try { @@ -700,7 +700,7 @@ public class TestIndexReaderReopen extends LuceneTestCase { final Directory dir = newDirectory(); final int n = 30 * RANDOM_MULTIPLIER; IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( - TEST_VERSION_CURRENT, new MockAnalyzer())); + TEST_VERSION_CURRENT, new MockAnalyzer(random))); for (int i = 0; i < n; i++) { writer.addDocument(createDocument(i, 3)); } @@ -721,7 +721,7 @@ public class TestIndexReaderReopen extends LuceneTestCase { modifier.close(); } else { IndexWriter modifier = new IndexWriter(dir, new IndexWriterConfig( - TEST_VERSION_CURRENT, new MockAnalyzer())); + TEST_VERSION_CURRENT, new MockAnalyzer(random))); modifier.addDocument(createDocument(n + i, 6)); modifier.close(); } @@ -937,7 +937,7 @@ public class TestIndexReaderReopen extends LuceneTestCase { public static void createIndex(Random random, Directory dir, boolean multiSegment) throws IOException { IndexWriter.unlock(dir); IndexWriter w = new IndexWriter(dir, LuceneTestCase.newIndexWriterConfig(random, - TEST_VERSION_CURRENT, new MockAnalyzer()) + TEST_VERSION_CURRENT, new MockAnalyzer(random)) .setMergePolicy(new LogDocMergePolicy())); for (int i = 0; i < 100; i++) { @@ -984,7 +984,7 @@ public class TestIndexReaderReopen extends LuceneTestCase { if (VERBOSE) { System.out.println("TEST: modify index"); } - IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer())); + IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random))); w.setInfoStream(VERBOSE ? System.out : null); w.deleteDocuments(new Term("field2", "a11")); w.deleteDocuments(new Term("field2", "b30")); @@ -1001,13 +1001,13 @@ public class TestIndexReaderReopen extends LuceneTestCase { break; } case 2: { - IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer())); + IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random))); w.optimize(); w.close(); break; } case 3: { - IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer())); + IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random))); w.addDocument(createDocument(101, 4)); w.optimize(); w.addDocument(createDocument(102, 4)); @@ -1024,7 +1024,7 @@ public class TestIndexReaderReopen extends LuceneTestCase { break; } case 5: { - IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer())); + IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random))); w.addDocument(createDocument(101, 4)); w.close(); break; @@ -1090,7 +1090,7 @@ public class TestIndexReaderReopen extends LuceneTestCase { @Override public void setUp() throws Exception { super.setUp(); - indexDir = new File(TEMP_DIR, "IndexReaderReopen"); + indexDir = _TestUtil.getTempDir("IndexReaderReopen"); } public void testCloseOrig() throws Throwable { @@ -1188,7 +1188,7 @@ public class TestIndexReaderReopen extends LuceneTestCase { Directory dir = newDirectory(); IndexWriter writer = new IndexWriter( dir, - newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()). + newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)). setIndexDeletionPolicy(new KeepAllCommits()). setMaxBufferedDocs(-1). setMergePolicy(newLogMergePolicy(10)) diff --git a/lucene/src/test/org/apache/lucene/index/TestIndexWriter.java b/lucene/src/test/org/apache/lucene/index/TestIndexWriter.java index 89bafd921d1..e6f27030134 100644 --- a/lucene/src/test/org/apache/lucene/index/TestIndexWriter.java +++ b/lucene/src/test/org/apache/lucene/index/TestIndexWriter.java @@ -37,6 +37,7 @@ import java.util.concurrent.atomic.AtomicBoolean; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.analysis.MockFixedLengthPayloadFilter; import org.apache.lucene.analysis.MockTokenizer; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.Tokenizer; @@ -86,7 +87,7 @@ public class TestIndexWriter extends LuceneTestCase { try { IndexWriterConfig.setDefaultWriteLockTimeout(2000); assertEquals(2000, IndexWriterConfig.getDefaultWriteLockTimeout()); - writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer())); + writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random))); } finally { IndexWriterConfig.setDefaultWriteLockTimeout(savedWriteLockTimeout); } @@ -110,7 +111,7 @@ public class TestIndexWriter extends LuceneTestCase { reader.close(); // optimize the index and check that the new doc count is correct - writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer())); + writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random))); assertEquals(60, writer.numDocs()); writer.optimize(); assertEquals(60, writer.maxDoc()); @@ -125,7 +126,7 @@ public class TestIndexWriter extends LuceneTestCase { // make sure opening a new index for create over // this existing one works correctly: - writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer()).setOpenMode(OpenMode.CREATE)); + writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)).setOpenMode(OpenMode.CREATE)); assertEquals(0, writer.maxDoc()); assertEquals(0, writer.numDocs()); writer.close(); @@ -147,13 +148,13 @@ public class TestIndexWriter extends LuceneTestCase { writer.addDocument(doc); } - + public static void assertNoUnreferencedFiles(Directory dir, String message) throws IOException { String[] startFiles = dir.listAll(); SegmentInfos infos = new SegmentInfos(); infos.read(dir); - new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer())).rollback(); + new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random))).rollback(); String[] endFiles = dir.listAll(); Arrays.sort(startFiles); @@ -170,13 +171,13 @@ public class TestIndexWriter extends LuceneTestCase { final Document doc = new Document(); doc.add(newField("content", "aaa", Field.Store.YES, Field.Index.ANALYZED)); - - for(int numDocs=38;numDocs<500;numDocs += 38) { + final int incrMin = TEST_NIGHTLY ? 15 : 40; + for(int numDocs=10;numDocs<500;numDocs += _TestUtil.nextInt(random, incrMin, 5*incrMin)) { LogDocMergePolicy ldmp = new LogDocMergePolicy(); ldmp.setMinMergeDocs(1); ldmp.setMergeFactor(5); IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( - TEST_VERSION_CURRENT, new MockAnalyzer()) + TEST_VERSION_CURRENT, new MockAnalyzer(random)) .setOpenMode(OpenMode.CREATE).setMaxBufferedDocs(2).setMergePolicy( ldmp)); for(int j=0;j lastFlushCount); + assertTrue("" + j, flushCount > lastFlushCount); lastFlushCount = flushCount; writer.getConfig().setRAMBufferSizeMB(0.000001); writer.getConfig().setMaxBufferedDeleteTerms(1); @@ -802,8 +826,8 @@ public class TestIndexWriter extends LuceneTestCase { } public void testDiverseDocs() throws IOException { - MockDirectoryWrapper dir = newDirectory(); - IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer()).setRAMBufferSizeMB(0.5)); + MockDirectoryWrapper dir = newDirectory(); + IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)).setRAMBufferSizeMB(0.5)); for(int i=0;i<3;i++) { // First, docs where every term is unique (heavy on // Posting instances) @@ -849,12 +873,12 @@ public class TestIndexWriter extends LuceneTestCase { } public void testEnablingNorms() throws IOException { - MockDirectoryWrapper dir = newDirectory(); - IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer()).setMaxBufferedDocs(10)); + MockDirectoryWrapper dir = newDirectory(); + IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMaxBufferedDocs(10)); // Enable norms for only 1 doc, pre flush for(int j=0;j<10;j++) { Document doc = new Document(); - Field f = newField("field", "aaa", Field.Store.YES, Field.Index.ANALYZED); + Field f = newField("field", "aaa", Field.Store.YES, Field.Index.ANALYZED); if (j != 8) { f.setOmitNorms(true); } @@ -870,12 +894,12 @@ public class TestIndexWriter extends LuceneTestCase { assertEquals(10, hits.length); searcher.close(); - writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer()) + writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)) .setOpenMode(OpenMode.CREATE).setMaxBufferedDocs(10)); // Enable norms for only 1 doc, post flush for(int j=0;j<27;j++) { Document doc = new Document(); - Field f = newField("field", "aaa", Field.Store.YES, Field.Index.ANALYZED); + Field f = newField("field", "aaa", Field.Store.YES, Field.Index.ANALYZED); if (j != 26) { f.setOmitNorms(true); } @@ -895,9 +919,9 @@ public class TestIndexWriter extends LuceneTestCase { } public void testHighFreqTerm() throws IOException { - MockDirectoryWrapper dir = newDirectory(); + MockDirectoryWrapper dir = newDirectory(); IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( - TEST_VERSION_CURRENT, new MockAnalyzer()).setRAMBufferSizeMB(0.01)); + TEST_VERSION_CURRENT, new MockAnalyzer(random)).setRAMBufferSizeMB(0.01)); // Massive doc that has 128 K a's StringBuilder b = new StringBuilder(1024*1024); for(int i=0;i<4096;i++) { @@ -945,21 +969,21 @@ public class TestIndexWriter extends LuceneTestCase { return myLockFactory.makeLock(name); } } - + Directory dir = new MyRAMDirectory(new RAMDirectory()); IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( - TEST_VERSION_CURRENT, new MockAnalyzer())); + TEST_VERSION_CURRENT, new MockAnalyzer(random))); for (int i = 0; i < 100; i++) { addDoc(writer); } writer.close(); - Term searchTerm = new Term("content", "aaa"); + Term searchTerm = new Term("content", "aaa"); IndexSearcher searcher = new IndexSearcher(dir, false); ScoreDoc[] hits = searcher.search(new TermQuery(searchTerm), null, 1000).scoreDocs; assertEquals("did not get right number of hits", 100, hits.length); searcher.close(); - writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer()) + writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)) .setOpenMode(OpenMode.CREATE)); writer.close(); searcher.close(); @@ -970,7 +994,7 @@ public class TestIndexWriter extends LuceneTestCase { Directory dir = newDirectory(); IndexWriter writer = new IndexWriter( dir, - newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()). + newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)). setMaxBufferedDocs(2). setMergePolicy(newLogMergePolicy(10)) ); @@ -992,7 +1016,7 @@ public class TestIndexWriter extends LuceneTestCase { // empty doc (no norms) and flush public void testEmptyDocAfterFlushingRealDoc() throws IOException { Directory dir = newDirectory(); - IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer())); + IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random))); writer.setInfoStream(VERBOSE ? System.out : null); Document doc = new Document(); doc.add(newField("field", "aaa", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); @@ -1019,7 +1043,7 @@ public class TestIndexWriter extends LuceneTestCase { for(int pass=0;pass<2;pass++) { IndexWriter writer = new IndexWriter( dir, - newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()). + newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)). setOpenMode(OpenMode.CREATE). setMaxBufferedDocs(2). setMergePolicy(newLogMergePolicy(101)) @@ -1050,7 +1074,7 @@ public class TestIndexWriter extends LuceneTestCase { infos.read(dir); assertEquals(2, infos.size()); } - } + } dir.close(); } @@ -1065,8 +1089,8 @@ public class TestIndexWriter extends LuceneTestCase { public void testBadSegment() throws IOException { Directory dir = newDirectory(); IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig( - TEST_VERSION_CURRENT, new MockAnalyzer())); - + TEST_VERSION_CURRENT, new MockAnalyzer(random))); + Document document = new Document(); document.add(newField("tvtest", "", Store.NO, Index.ANALYZED, TermVector.YES)); iw.addDocument(document); @@ -1080,7 +1104,7 @@ public class TestIndexWriter extends LuceneTestCase { try { Directory dir = newDirectory(); IndexWriterConfig conf = newIndexWriterConfig( - TEST_VERSION_CURRENT, new MockAnalyzer()) + TEST_VERSION_CURRENT, new MockAnalyzer(random)) .setMaxBufferedDocs(2).setMergePolicy(newLogMergePolicy()); ((LogMergePolicy) conf.getMergePolicy()).setMergeFactor(2); IndexWriter iw = new IndexWriter(dir, conf); @@ -1122,12 +1146,13 @@ public class TestIndexWriter extends LuceneTestCase { public void testSetMaxMergeDocs() throws IOException { Directory dir = newDirectory(); IndexWriterConfig conf = newIndexWriterConfig( - TEST_VERSION_CURRENT, new MockAnalyzer()) + TEST_VERSION_CURRENT, new MockAnalyzer(random)) .setMergeScheduler(new MyMergeScheduler()).setMaxBufferedDocs(2).setMergePolicy(newLogMergePolicy()); LogMergePolicy lmp = (LogMergePolicy) conf.getMergePolicy(); lmp.setMaxMergeDocs(20); lmp.setMergeFactor(2); IndexWriter iw = new IndexWriter(dir, conf); + iw.setInfoStream(VERBOSE ? System.out : null); Document document = new Document(); document.add(newField("tvtest", "a b c", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.YES)); @@ -1146,7 +1171,7 @@ public class TestIndexWriter extends LuceneTestCase { if (VERBOSE) { System.out.println("TEST: iter=" + i); } - IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer()).setMaxBufferedDocs(2).setMergePolicy(newLogMergePolicy())); + IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMaxBufferedDocs(2).setMergePolicy(newLogMergePolicy())); writer.setInfoStream(VERBOSE ? System.out : null); //LogMergePolicy lmp = (LogMergePolicy) writer.getConfig().getMergePolicy(); //lmp.setMergeFactor(2); @@ -1183,7 +1208,7 @@ public class TestIndexWriter extends LuceneTestCase { reader.close(); if (0 == i % 4) { - writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer())); + writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random))); //LogMergePolicy lmp2 = (LogMergePolicy) writer.getConfig().getMergePolicy(); //lmp2.setUseCompoundFile(false); writer.optimize(); @@ -1207,7 +1232,7 @@ public class TestIndexWriter extends LuceneTestCase { IndexWriter writer = new IndexWriter( directory, - newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()). + newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)). setOpenMode(OpenMode.CREATE). setMaxBufferedDocs(2). setMergePolicy(newLogMergePolicy()) @@ -1276,7 +1301,7 @@ public class TestIndexWriter extends LuceneTestCase { reader.close(); // Reopen - writer = new IndexWriter(directory, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer()).setOpenMode(OpenMode.APPEND).setMergePolicy(newLogMergePolicy())); + writer = new IndexWriter(directory, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)).setOpenMode(OpenMode.APPEND).setMergePolicy(newLogMergePolicy())); writer.setInfoStream(VERBOSE ? System.out : null); } writer.close(); @@ -1290,7 +1315,7 @@ public class TestIndexWriter extends LuceneTestCase { public void testUnlimitedMaxFieldLength() throws IOException { Directory dir = newDirectory(); - IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer())); + IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random))); Document doc = new Document(); StringBuilder b = new StringBuilder(); @@ -1314,12 +1339,12 @@ public class TestIndexWriter extends LuceneTestCase { IndexWriter writer = new IndexWriter( dir, - newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()). + newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)). setMaxBufferedDocs(2). setMergePolicy(newLogMergePolicy(5)) ); writer.commit(); - + for (int i = 0; i < 23; i++) addDoc(writer); @@ -1346,13 +1371,13 @@ public class TestIndexWriter extends LuceneTestCase { writer.close(); dir.close(); } - + // LUCENE-325: test expungeDeletes, when 2 singular merges // are required public void testExpungeDeletes() throws IOException { Directory dir = newDirectory(); - IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( - TEST_VERSION_CURRENT, new MockAnalyzer()) + IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( + TEST_VERSION_CURRENT, new MockAnalyzer(random)) .setMaxBufferedDocs(2).setRAMBufferSizeMB( IndexWriterConfig.DISABLE_AUTO_FLUSH)); writer.setInfoStream(VERBOSE ? System.out : null); @@ -1378,7 +1403,7 @@ public class TestIndexWriter extends LuceneTestCase { assertEquals(8, ir.numDocs()); ir.close(); - writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer()).setMergePolicy(newLogMergePolicy())); + writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMergePolicy(newLogMergePolicy())); assertEquals(8, writer.numDocs()); assertEquals(10, writer.maxDoc()); writer.expungeDeletes(); @@ -1396,7 +1421,7 @@ public class TestIndexWriter extends LuceneTestCase { Directory dir = newDirectory(); IndexWriter writer = new IndexWriter( dir, - newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()). + newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)). setMaxBufferedDocs(2). setRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH). setMergePolicy(newLogMergePolicy(50)) @@ -1426,7 +1451,7 @@ public class TestIndexWriter extends LuceneTestCase { writer = new IndexWriter( dir, - newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()). + newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)). setMergePolicy(newLogMergePolicy(3)) ); assertEquals(49, writer.numDocs()); @@ -1445,7 +1470,7 @@ public class TestIndexWriter extends LuceneTestCase { Directory dir = newDirectory(); IndexWriter writer = new IndexWriter( dir, - newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()). + newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)). setMaxBufferedDocs(2). setRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH). setMergePolicy(newLogMergePolicy(50)) @@ -1475,7 +1500,7 @@ public class TestIndexWriter extends LuceneTestCase { writer = new IndexWriter( dir, - newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer()). + newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)). setMergePolicy(newLogMergePolicy(3)) ); writer.expungeDeletes(false); @@ -1490,7 +1515,7 @@ public class TestIndexWriter extends LuceneTestCase { // LUCENE-1179 public void testEmptyFieldName() throws IOException { Directory dir = newDirectory(); - IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer())); + IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random))); Document doc = new Document(); doc.add(newField("", "a b c", Field.Store.NO, Field.Index.ANALYZED)); writer.addDocument(doc); @@ -1513,18 +1538,18 @@ public class TestIndexWriter extends LuceneTestCase { public void doAfterFlush() { afterWasCalled = true; } - + @Override protected void doBeforeFlush() throws IOException { beforeWasCalled = true; } } - + // LUCENE-1222 public void testDoBeforeAfterFlush() throws IOException { Directory dir = newDirectory(); - MockIndexWriter w = new MockIndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer())); + MockIndexWriter w = new MockIndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random))); Document doc = new Document(); doc.add(newField("field", "a field", Field.Store.YES, Field.Index.ANALYZED)); @@ -1548,7 +1573,7 @@ public class TestIndexWriter extends LuceneTestCase { } - + final String[] utf8Data = new String[] { // unpaired low surrogate "ab\udc17cd", "ab\ufffdcd", @@ -1578,7 +1603,7 @@ public class TestIndexWriter extends LuceneTestCase { // LUCENE-510 public void testInvalidUTF16() throws Throwable { Directory dir = newDirectory(); - IndexWriter w = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer())); + IndexWriter w = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random))); Document doc = new Document(); final int count = utf8Data.length/2; @@ -1618,7 +1643,7 @@ public class TestIndexWriter extends LuceneTestCase { } UnicodeUtil.UTF16toUTF8(chars, 0, len, utf8); - + String s1 = new String(chars, 0, len); String s2 = new String(utf8.bytes, 0, utf8.length, "UTF-8"); assertEquals("codepoint " + ch, s1, s2); @@ -1675,7 +1700,7 @@ public class TestIndexWriter extends LuceneTestCase { expected[i++] = 0xfffd; expected[i] = buffer[i] = (char) nextInt(0x800, 0xd800); hasIllegal = true; - } else + } else expected[i] = buffer[i] = (char) nextInt(0x800, 0xd800); } else { expected[i] = buffer[i] = ' '; @@ -1772,10 +1797,10 @@ public class TestIndexWriter extends LuceneTestCase { final TokenStream tokens = new TokenStream() { final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); final PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class); - + final Iterator terms = Arrays.asList("a","b","c").iterator(); boolean first = true; - + @Override public boolean incrementToken() { if (!terms.hasNext()) return false; @@ -1788,7 +1813,7 @@ public class TestIndexWriter extends LuceneTestCase { }; Directory dir = newDirectory(); - IndexWriter w = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer())); + IndexWriter w = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random))); Document doc = new Document(); doc.add(new Field("field", tokens)); w.addDocument(doc); @@ -1827,12 +1852,12 @@ public class TestIndexWriter extends LuceneTestCase { IndexWriter writer = new IndexWriter( dir, - newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()). + newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)). setMaxBufferedDocs(2). setMergePolicy(newLogMergePolicy(5)) ); writer.commit(); - + for (int i = 0; i < 23; i++) addDoc(writer); @@ -1883,12 +1908,12 @@ public class TestIndexWriter extends LuceneTestCase { IndexWriter writer = new IndexWriter( dir, - newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()). + newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)). setMaxBufferedDocs(2). setMergePolicy(newLogMergePolicy(5)) ); writer.commit(); - + for (int i = 0; i < 23; i++) addDoc(writer); @@ -1909,7 +1934,7 @@ public class TestIndexWriter extends LuceneTestCase { reader.close(); reader2.close(); - writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer())); + writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random))); for (int i = 0; i < 17; i++) addDoc(writer); @@ -1937,7 +1962,7 @@ public class TestIndexWriter extends LuceneTestCase { public void testPrepareCommitNoChanges() throws IOException { Directory dir = newDirectory(); - IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer())); + IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random))); writer.prepareCommit(); writer.commit(); writer.close(); @@ -1951,11 +1976,11 @@ public class TestIndexWriter extends LuceneTestCase { // LUCENE-1219 public void testBinaryFieldOffsetLength() throws IOException { Directory dir = newDirectory(); - IndexWriter w = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer())); + IndexWriter w = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random))); byte[] b = new byte[50]; for(int i=0;i<50;i++) b[i] = (byte) (i+77); - + Document doc = new Document(); Field f = new Field("binary", b, 10, 17); byte[] bx = f.getBinaryValue(); @@ -1981,7 +2006,7 @@ public class TestIndexWriter extends LuceneTestCase { // LUCENE-1382 public void testCommitUserData() throws IOException { Directory dir = newDirectory(); - IndexWriter w = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer()).setMaxBufferedDocs(2)); + IndexWriter w = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMaxBufferedDocs(2)); for(int j=0;j<17;j++) addDoc(w); w.close(); @@ -1992,27 +2017,27 @@ public class TestIndexWriter extends LuceneTestCase { // commit(Map) never called for this index assertEquals(0, r.getCommitUserData().size()); r.close(); - - w = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer()).setMaxBufferedDocs(2)); + + w = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMaxBufferedDocs(2)); for(int j=0;j<17;j++) addDoc(w); Map data = new HashMap(); data.put("label", "test1"); w.commit(data); w.close(); - + assertEquals("test1", IndexReader.getCommitUserData(dir).get("label")); r = IndexReader.open(dir, true); assertEquals("test1", r.getCommitUserData().get("label")); r.close(); - w = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer())); + w = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random))); w.optimize(); w.close(); assertEquals("test1", IndexReader.getCommitUserData(dir).get("label")); - + dir.close(); } @@ -2020,9 +2045,9 @@ public class TestIndexWriter extends LuceneTestCase { // LUCENE-2529 public void testPositionIncrementGapEmptyField() throws Exception { Directory dir = newDirectory(); - MockAnalyzer analyzer = new MockAnalyzer(); + MockAnalyzer analyzer = new MockAnalyzer(random); analyzer.setPositionIncrementGap( 100 ); - IndexWriter w = new IndexWriter(dir, newIndexWriterConfig( + IndexWriter w = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, analyzer)); Document doc = new Document(); Field f = newField("field", "", Field.Store.NO, @@ -2049,7 +2074,7 @@ public class TestIndexWriter extends LuceneTestCase { // LUCENE-1468 -- make sure opening an IndexWriter with // create=true does not remove non-index files - + public void testOtherFiles() throws Throwable { Directory dir = newDirectory(); try { @@ -2058,7 +2083,7 @@ public class TestIndexWriter extends LuceneTestCase { out.writeByte((byte) 42); out.close(); - new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer())).close(); + new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random))).close(); assertTrue(dir.fileExists("myrandomfile")); } finally { @@ -2068,7 +2093,7 @@ public class TestIndexWriter extends LuceneTestCase { public void testDeadlock() throws Exception { Directory dir = newDirectory(); - IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer()).setMaxBufferedDocs(2)); + IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMaxBufferedDocs(2)); Document doc = new Document(); doc.add(newField("content", "aaa bbb ccc ddd eee fff ggg hhh iii", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); @@ -2079,7 +2104,7 @@ public class TestIndexWriter extends LuceneTestCase { // index has 2 segments Directory dir2 = newDirectory(); - IndexWriter writer2 = new IndexWriter(dir2, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer())); + IndexWriter writer2 = new IndexWriter(dir2, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random))); writer2.addDocument(doc); writer2.close(); @@ -2108,7 +2133,7 @@ public class TestIndexWriter extends LuceneTestCase { @Override public void run() { // LUCENE-2239: won't work with NIOFS/MMAP - Directory dir = new MockDirectoryWrapper(random, new RAMDirectory()); + Directory dir = new MockDirectoryWrapper(random, new RAMDirectory()); IndexWriter w = null; while(!finish) { try { @@ -2117,8 +2142,8 @@ public class TestIndexWriter extends LuceneTestCase { if (w != null) { w.close(); } - IndexWriterConfig conf = newIndexWriterConfig( - TEST_VERSION_CURRENT, new MockAnalyzer()).setMaxBufferedDocs(2); + IndexWriterConfig conf = newIndexWriterConfig( + TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMaxBufferedDocs(2); w = new IndexWriter(dir, conf); Document doc = new Document(); @@ -2184,10 +2209,10 @@ public class TestIndexWriter extends LuceneTestCase { e.printStackTrace(System.out); } } - try { + try { dir.close(); - } catch (IOException e) { - throw new RuntimeException(e); + } catch (IOException e) { + throw new RuntimeException(e); } } } @@ -2202,7 +2227,7 @@ public class TestIndexWriter extends LuceneTestCase { // interrupt arrives while class loader is trying to // init this class (in servicing a first interrupt): assertTrue(new ThreadInterruptedException(new InterruptedException()).getCause() instanceof InterruptedException); - + // issue 100 interrupts to child thread int i = 0; while(i < 100) { @@ -2223,7 +2248,7 @@ public class TestIndexWriter extends LuceneTestCase { public void testIndexStoreCombos() throws Exception { Directory dir = newDirectory(); - IndexWriter w = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer())); + IndexWriter w = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random))); byte[] b = new byte[50]; for(int i=0;i<50;i++) b[i] = (byte) (i+77); @@ -2236,12 +2261,12 @@ public class TestIndexWriter extends LuceneTestCase { doc.add(f); doc.add(f2); w.addDocument(doc); - + // add 2 docs to test in-memory merging f.setTokenStream(new MockTokenizer(new StringReader("doc2field1"), MockTokenizer.WHITESPACE, false)); f2.setTokenStream(new MockTokenizer(new StringReader("doc2field2"), MockTokenizer.WHITESPACE, false)); w.addDocument(doc); - + // force segment flush so we can force a segment merge with doc3 later. w.commit(); @@ -2264,7 +2289,7 @@ public class TestIndexWriter extends LuceneTestCase { assertTrue(ir.document(0).getFieldable("binary").isBinary()); assertTrue(ir.document(1).getFieldable("binary").isBinary()); assertTrue(ir.document(2).getFieldable("binary").isBinary()); - + assertEquals("value", ir.document(0).get("string")); assertEquals("value", ir.document(1).get("string")); assertEquals("value", ir.document(2).get("string")); @@ -2286,7 +2311,7 @@ public class TestIndexWriter extends LuceneTestCase { // LUCENE-1727: make sure doc fields are stored in order public void testStoredFieldsOrder() throws Throwable { Directory d = newDirectory(); - IndexWriter w = new IndexWriter(d, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer())); + IndexWriter w = new IndexWriter(d, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random))); Document doc = new Document(); doc.add(newField("zzz", "a b c", Field.Store.YES, Field.Index.NO)); doc.add(newField("aaa", "a b c", Field.Store.YES, Field.Index.NO)); @@ -2318,7 +2343,7 @@ public class TestIndexWriter extends LuceneTestCase { public void testEmbeddedFFFF() throws Throwable { Directory d = newDirectory(); - IndexWriter w = new IndexWriter(d, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer())); + IndexWriter w = new IndexWriter(d, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random))); Document doc = new Document(); doc.add(newField("field", "a a\uffffb", Field.Store.NO, Field.Index.ANALYZED)); w.addDocument(doc); @@ -2335,8 +2360,8 @@ public class TestIndexWriter extends LuceneTestCase { public void testNoDocsIndex() throws Throwable { Directory dir = newDirectory(); - IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( - TEST_VERSION_CURRENT, new MockAnalyzer())); + IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( + TEST_VERSION_CURRENT, new MockAnalyzer(random))); ByteArrayOutputStream bos = new ByteArrayOutputStream(1024); writer.setInfoStream(new PrintStream(bos)); writer.addDocument(new Document()); @@ -2345,7 +2370,7 @@ public class TestIndexWriter extends LuceneTestCase { _TestUtil.checkIndex(dir); dir.close(); } - + // LUCENE-2095: make sure with multiple threads commit // doesn't return until all changes are in fact in the // index @@ -2353,8 +2378,8 @@ public class TestIndexWriter extends LuceneTestCase { final int NUM_THREADS = 5; final double RUN_SEC = 0.5; final Directory dir = newDirectory(); - final RandomIndexWriter w = new RandomIndexWriter(random, dir, newIndexWriterConfig( - TEST_VERSION_CURRENT, new MockAnalyzer()).setMergePolicy(newLogMergePolicy())); + final RandomIndexWriter w = new RandomIndexWriter(random, dir, newIndexWriterConfig( + TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMergePolicy(newLogMergePolicy())); _TestUtil.reduceOpenFiles(w.w); w.commit(); final AtomicBoolean failed = new AtomicBoolean(); @@ -2526,7 +2551,7 @@ public class TestIndexWriter extends LuceneTestCase { public void testIndexDivisor() throws Exception { Directory dir = newDirectory(); - IndexWriterConfig config = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()); + IndexWriterConfig config = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)); config.setTermIndexInterval(2); IndexWriter w = new IndexWriter(dir, config); StringBuilder s = new StringBuilder(); @@ -2538,7 +2563,7 @@ public class TestIndexWriter extends LuceneTestCase { Field f = newField("field", s.toString(), Field.Store.NO, Field.Index.ANALYZED); d.add(f); w.addDocument(d); - + IndexReader r = w.getReader().getSequentialSubReaders()[0]; TermsEnum t = r.fields().terms("field").iterator(); int count = 0; @@ -2563,7 +2588,7 @@ public class TestIndexWriter extends LuceneTestCase { IndexWriter w = new IndexWriter( dir, - newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()). + newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)). setMergePolicy(mergePolicy) ); Document doc = new Document(); @@ -2624,10 +2649,10 @@ public class TestIndexWriter extends LuceneTestCase { // in case a deletion policy which holds onto commits is used. Directory dir = newDirectory(); SnapshotDeletionPolicy sdp = new SnapshotDeletionPolicy(new KeepOnlyLastCommitDeletionPolicy()); - IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( - TEST_VERSION_CURRENT, new MockAnalyzer()) + IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( + TEST_VERSION_CURRENT, new MockAnalyzer(random)) .setIndexDeletionPolicy(sdp)); - + // First commit Document doc = new Document(); doc.add(newField("c", "val", Store.YES, Index.ANALYZED, TermVector.WITH_POSITIONS_OFFSETS)); @@ -2637,7 +2662,7 @@ public class TestIndexWriter extends LuceneTestCase { // Keep that commit sdp.snapshot("id"); - + // Second commit - now KeepOnlyLastCommit cannot delete the prev commit. doc = new Document(); doc.add(newField("c", "val", Store.YES, Index.ANALYZED, TermVector.WITH_POSITIONS_OFFSETS)); @@ -2649,31 +2674,26 @@ public class TestIndexWriter extends LuceneTestCase { sdp.release("id"); writer.deleteUnusedFiles(); assertEquals(1, IndexReader.listCommits(dir).size()); - + writer.close(); dir.close(); } - - private static class FlushCountingIndexWriter extends IndexWriter { - int flushCount; - public FlushCountingIndexWriter(Directory dir, IndexWriterConfig iwc) throws IOException { - super(dir, iwc); - } - @Override - public void doAfterFlush() { - flushCount++; - } - } public void testIndexingThenDeleting() throws Exception { final Random r = random; - Directory dir = newDirectory(); - FlushCountingIndexWriter w = new FlushCountingIndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(MockTokenizer.WHITESPACE, true, false)).setRAMBufferSizeMB(1.0).setMaxBufferedDocs(-1).setMaxBufferedDeleteTerms(-1)); + // note this test explicitly disables payloads + final Analyzer analyzer = new Analyzer() { + @Override + public TokenStream tokenStream(String fieldName, Reader reader) { + return new MockTokenizer(reader, MockTokenizer.WHITESPACE, true); + } + }; + IndexWriter w = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, analyzer).setRAMBufferSizeMB(1.0).setMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH).setMaxBufferedDeleteTerms(IndexWriterConfig.DISABLE_AUTO_FLUSH)); w.setInfoStream(VERBOSE ? System.out : null); Document doc = new Document(); doc.add(newField("field", "go 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20", Field.Store.NO, Field.Index.ANALYZED)); - int num = 6 * RANDOM_MULTIPLIER; + int num = TEST_NIGHTLY ? 6 * RANDOM_MULTIPLIER : 3 * RANDOM_MULTIPLIER; for (int iter = 0; iter < num; iter++) { int count = 0; @@ -2683,15 +2703,15 @@ public class TestIndexWriter extends LuceneTestCase { } if (doIndexing) { // Add docs until a flush is triggered - final int startFlushCount = w.flushCount; - while(w.flushCount == startFlushCount) { + final int startFlushCount = w.getFlushCount(); + while(w.getFlushCount() == startFlushCount) { w.addDocument(doc); count++; } } else { // Delete docs until a flush is triggered - final int startFlushCount = w.flushCount; - while(w.flushCount == startFlushCount) { + final int startFlushCount = w.getFlushCount(); + while(w.getFlushCount() == startFlushCount) { w.deleteDocuments(new Term("foo", ""+count)); count++; } @@ -2701,13 +2721,13 @@ public class TestIndexWriter extends LuceneTestCase { w.close(); dir.close(); } - + public void testNoCommits() throws Exception { // Tests that if we don't call commit(), the directory has 0 commits. This has // changed since LUCENE-2386, where before IW would always commit on a fresh // new index. Directory dir = newDirectory(); - IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer())); + IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random))); try { IndexReader.listCommits(dir); fail("listCommits should have thrown an exception over empty index"); @@ -2722,20 +2742,20 @@ public class TestIndexWriter extends LuceneTestCase { public void testEmptyFSDirWithNoLock() throws Exception { // Tests that if FSDir is opened w/ a NoLockFactory (or SingleInstanceLF), - // then IndexWriter ctor succeeds. Previously (LUCENE-2386) it failed + // then IndexWriter ctor succeeds. Previously (LUCENE-2386) it failed // when listAll() was called in IndexFileDeleter. - Directory dir = newFSDirectory(new File(TEMP_DIR, "emptyFSDirNoLock"), NoLockFactory.getNoLockFactory()); - new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer())).close(); + Directory dir = newFSDirectory(_TestUtil.getTempDir("emptyFSDirNoLock"), NoLockFactory.getNoLockFactory()); + new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random))).close(); dir.close(); } public void testEmptyDirRollback() throws Exception { // Tests that if IW is created over an empty Directory, some documents are - // indexed, flushed (but not committed) and then IW rolls back, then no + // indexed, flushed (but not committed) and then IW rolls back, then no // files are left in the Directory. Directory dir = newDirectory(); - IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( - TEST_VERSION_CURRENT, new MockAnalyzer()) + IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( + TEST_VERSION_CURRENT, new MockAnalyzer(random)) .setMaxBufferedDocs(2).setMergePolicy(newLogMergePolicy())); String[] files = dir.listAll(); @@ -2758,7 +2778,7 @@ public class TestIndexWriter extends LuceneTestCase { writer.addDocument(doc); // Adding just one document does not call flush yet. assertEquals("only the stored and term vector files should exist in the directory", 5 + extraFileCount, dir.listAll().length); - + doc = new Document(); doc.add(newField("c", "val", Store.YES, Index.ANALYZED, TermVector.WITH_POSITIONS_OFFSETS)); writer.addDocument(doc); @@ -2779,17 +2799,17 @@ public class TestIndexWriter extends LuceneTestCase { public void testNoSegmentFile() throws IOException { Directory dir = newDirectory(); dir.setLockFactory(NoLockFactory.getNoLockFactory()); - IndexWriter w = new IndexWriter(dir, newIndexWriterConfig( - TEST_VERSION_CURRENT, new MockAnalyzer()).setMaxBufferedDocs(2)); - + IndexWriter w = new IndexWriter(dir, newIndexWriterConfig( + TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMaxBufferedDocs(2)); + Document doc = new Document(); doc.add(newField("c", "val", Store.YES, Index.ANALYZED, TermVector.WITH_POSITIONS_OFFSETS)); w.addDocument(doc); w.addDocument(doc); - IndexWriter w2 = new IndexWriter(dir, newIndexWriterConfig( - TEST_VERSION_CURRENT, new MockAnalyzer()).setMaxBufferedDocs(2) + IndexWriter w2 = new IndexWriter(dir, newIndexWriterConfig( + TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMaxBufferedDocs(2) .setOpenMode(OpenMode.CREATE)); - + w2.close(); // If we don't do that, the test fails on Windows w.rollback(); @@ -2799,7 +2819,7 @@ public class TestIndexWriter extends LuceneTestCase { public void testFutureCommit() throws Exception { Directory dir = newDirectory(); - IndexWriter w = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer()).setIndexDeletionPolicy(NoDeletionPolicy.INSTANCE)); + IndexWriter w = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)).setIndexDeletionPolicy(NoDeletionPolicy.INSTANCE)); Document doc = new Document(); w.addDocument(doc); @@ -2825,10 +2845,10 @@ public class TestIndexWriter extends LuceneTestCase { assertNotNull(commit); - w = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer()).setIndexDeletionPolicy(NoDeletionPolicy.INSTANCE).setIndexCommit(commit)); + w = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)).setIndexDeletionPolicy(NoDeletionPolicy.INSTANCE).setIndexCommit(commit)); assertEquals(1, w.numDocs()); - + // commit IndexWriter to "third" w.addDocument(doc); commitData.put("tag", "third"); @@ -2875,7 +2895,7 @@ public class TestIndexWriter extends LuceneTestCase { public void testRandomStoredFields() throws IOException { Directory dir = newDirectory(); Random rand = random; - RandomIndexWriter w = new RandomIndexWriter(rand, dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).setMaxBufferedDocs(_TestUtil.nextInt(rand, 5, 20))); + RandomIndexWriter w = new RandomIndexWriter(rand, dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMaxBufferedDocs(_TestUtil.nextInt(rand, 5, 20))); //w.w.setInfoStream(System.out); //w.w.setUseCompoundFile(false); if (VERBOSE) { @@ -2883,7 +2903,7 @@ public class TestIndexWriter extends LuceneTestCase { } final int docCount = 200*RANDOM_MULTIPLIER; final int fieldCount = _TestUtil.nextInt(rand, 1, 5); - + final List fieldIDs = new ArrayList(); Field idField = newField("id", "", Field.Store.YES, Field.Index.NOT_ANALYZED); @@ -2893,7 +2913,7 @@ public class TestIndexWriter extends LuceneTestCase { } final Map docs = new HashMap(); - + if (VERBOSE) { System.out.println("TEST: build index docCount=" + docCount); } @@ -2971,7 +2991,7 @@ public class TestIndexWriter extends LuceneTestCase { public void testNoUnwantedTVFiles() throws Exception { Directory dir = newDirectory(); - IndexWriter indexWriter = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).setRAMBufferSizeMB(0.01).setMergePolicy(newLogMergePolicy())); + IndexWriter indexWriter = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setRAMBufferSizeMB(0.01).setMergePolicy(newLogMergePolicy())); ((LogMergePolicy) indexWriter.getConfig().getMergePolicy()).setUseCompoundFile(false); String BIG="alskjhlaksjghlaksjfhalksvjepgjioefgjnsdfjgefgjhelkgjhqewlrkhgwlekgrhwelkgjhwelkgrhwlkejg"; @@ -3080,7 +3100,7 @@ public class TestIndexWriter extends LuceneTestCase { Directory dir = newDirectory(); RandomIndexWriter w = new RandomIndexWriter(random, dir, new StringSplitAnalyzer()); - char[] chars = new char[DocumentsWriter.MAX_TERM_LENGTH_UTF8]; + char[] chars = new char[DocumentsWriterPerThread.MAX_TERM_LENGTH_UTF8]; Arrays.fill(chars, 'x'); Document doc = new Document(); final String bigTerm = new String(chars); diff --git a/lucene/src/test/org/apache/lucene/index/TestIndexWriterConfig.java b/lucene/src/test/org/apache/lucene/index/TestIndexWriterConfig.java index eee7dca1edf..689a6ad0911 100644 --- a/lucene/src/test/org/apache/lucene/index/TestIndexWriterConfig.java +++ b/lucene/src/test/org/apache/lucene/index/TestIndexWriterConfig.java @@ -24,7 +24,7 @@ import java.util.HashSet; import java.util.Set; import org.apache.lucene.analysis.MockAnalyzer; -import org.apache.lucene.index.DocumentsWriter.IndexingChain; +import org.apache.lucene.index.DocumentsWriterPerThread.IndexingChain; import org.apache.lucene.index.IndexWriterConfig.OpenMode; import org.apache.lucene.search.DefaultSimilarityProvider; import org.apache.lucene.search.IndexSearcher; @@ -36,20 +36,20 @@ public class TestIndexWriterConfig extends LuceneTestCase { private static final class MySimilarityProvider extends DefaultSimilarityProvider { // Does not implement anything - used only for type checking on IndexWriterConfig. } - + private static final class MyIndexingChain extends IndexingChain { // Does not implement anything - used only for type checking on IndexWriterConfig. @Override - public DocConsumer getChain(DocumentsWriter documentsWriter) { + DocConsumer getChain(DocumentsWriterPerThread documentsWriter) { return null; } - + } @Test public void testDefaults() throws Exception { - IndexWriterConfig conf = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()); + IndexWriterConfig conf = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)); assertEquals(MockAnalyzer.class, conf.getAnalyzer().getClass()); assertNull(conf.getIndexCommit()); assertEquals(KeepOnlyLastCommitDeletionPolicy.class, conf.getIndexDeletionPolicy().getClass()); @@ -64,12 +64,16 @@ public class TestIndexWriterConfig extends LuceneTestCase { assertEquals(IndexWriterConfig.DEFAULT_RAM_BUFFER_SIZE_MB, conf.getRAMBufferSizeMB(), 0.0); assertEquals(IndexWriterConfig.DEFAULT_MAX_BUFFERED_DOCS, conf.getMaxBufferedDocs()); assertEquals(IndexWriterConfig.DEFAULT_READER_POOLING, conf.getReaderPooling()); - assertTrue(DocumentsWriter.defaultIndexingChain == conf.getIndexingChain()); + assertTrue(DocumentsWriterPerThread.defaultIndexingChain == conf.getIndexingChain()); assertNull(conf.getMergedSegmentWarmer()); - assertEquals(IndexWriterConfig.DEFAULT_MAX_THREAD_STATES, conf.getMaxThreadStates()); assertEquals(IndexWriterConfig.DEFAULT_READER_TERMS_INDEX_DIVISOR, conf.getReaderTermsIndexDivisor()); - assertEquals(LogByteSizeMergePolicy.class, conf.getMergePolicy().getClass()); - + assertEquals(TieredMergePolicy.class, conf.getMergePolicy().getClass()); + assertEquals(ThreadAffinityDocumentsWriterThreadPool.class, conf.getIndexerThreadPool().getClass()); + assertNull(conf.getFlushPolicy()); + assertEquals(IndexWriterConfig.DEFAULT_RAM_PER_THREAD_HARD_LIMIT_MB, conf.getRAMPerThreadHardLimitMB()); + + + // Sanity check - validate that all getters are covered. Set getters = new HashSet(); getters.add("getAnalyzer"); @@ -91,7 +95,11 @@ public class TestIndexWriterConfig extends LuceneTestCase { getters.add("getMergePolicy"); getters.add("getMaxThreadStates"); getters.add("getReaderPooling"); + getters.add("getIndexerThreadPool"); getters.add("getReaderTermsIndexDivisor"); + getters.add("getFlushPolicy"); + getters.add("getRAMPerThreadHardLimitMB"); + for (Method m : IndexWriterConfig.class.getDeclaredMethods()) { if (m.getDeclaringClass() == IndexWriterConfig.class && m.getName().startsWith("get")) { assertTrue("method " + m.getName() + " is not tested for defaults", getters.contains(m.getName())); @@ -107,12 +115,12 @@ public class TestIndexWriterConfig extends LuceneTestCase { if (m.getDeclaringClass() == IndexWriterConfig.class && m.getName().startsWith("set") && !Modifier.isStatic(m.getModifiers())) { - assertEquals("method " + m.getName() + " does not return IndexWriterConfig", + assertEquals("method " + m.getName() + " does not return IndexWriterConfig", IndexWriterConfig.class, m.getReturnType()); } } } - + @Test public void testConstants() throws Exception { // Tests that the values of the constants does not change @@ -123,13 +131,12 @@ public class TestIndexWriterConfig extends LuceneTestCase { assertEquals(IndexWriterConfig.DISABLE_AUTO_FLUSH, IndexWriterConfig.DEFAULT_MAX_BUFFERED_DOCS); assertEquals(16.0, IndexWriterConfig.DEFAULT_RAM_BUFFER_SIZE_MB, 0.0); assertEquals(false, IndexWriterConfig.DEFAULT_READER_POOLING); - assertEquals(8, IndexWriterConfig.DEFAULT_MAX_THREAD_STATES); assertEquals(IndexReader.DEFAULT_TERMS_INDEX_DIVISOR, IndexWriterConfig.DEFAULT_READER_TERMS_INDEX_DIVISOR); } - + @Test public void testToString() throws Exception { - String str = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).toString(); + String str = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).toString(); for (Field f : IndexWriterConfig.class.getDeclaredFields()) { int modifiers = f.getModifiers(); if (Modifier.isStatic(modifiers) && Modifier.isFinal(modifiers)) { @@ -143,30 +150,30 @@ public class TestIndexWriterConfig extends LuceneTestCase { assertTrue(f.getName() + " not found in toString", str.indexOf(f.getName()) != -1); } } - + @Test public void testClone() throws Exception { - IndexWriterConfig conf = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()); + IndexWriterConfig conf = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)); IndexWriterConfig clone = (IndexWriterConfig) conf.clone(); - + // Clone is shallow since not all parameters are cloneable. assertTrue(conf.getIndexDeletionPolicy() == clone.getIndexDeletionPolicy()); - + conf.setMergeScheduler(new SerialMergeScheduler()); assertEquals(ConcurrentMergeScheduler.class, clone.getMergeScheduler().getClass()); } @Test public void testInvalidValues() throws Exception { - IndexWriterConfig conf = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()); - + IndexWriterConfig conf = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)); + // Test IndexDeletionPolicy assertEquals(KeepOnlyLastCommitDeletionPolicy.class, conf.getIndexDeletionPolicy().getClass()); conf.setIndexDeletionPolicy(new SnapshotDeletionPolicy(null)); assertEquals(SnapshotDeletionPolicy.class, conf.getIndexDeletionPolicy().getClass()); conf.setIndexDeletionPolicy(null); assertEquals(KeepOnlyLastCommitDeletionPolicy.class, conf.getIndexDeletionPolicy().getClass()); - + // Test MergeScheduler assertEquals(ConcurrentMergeScheduler.class, conf.getMergeScheduler().getClass()); conf.setMergeScheduler(new SerialMergeScheduler()); @@ -183,12 +190,12 @@ public class TestIndexWriterConfig extends LuceneTestCase { assertTrue(IndexSearcher.getDefaultSimilarityProvider() == conf.getSimilarityProvider()); // Test IndexingChain - assertTrue(DocumentsWriter.defaultIndexingChain == conf.getIndexingChain()); + assertTrue(DocumentsWriterPerThread.defaultIndexingChain == conf.getIndexingChain()); conf.setIndexingChain(new MyIndexingChain()); assertEquals(MyIndexingChain.class, conf.getIndexingChain().getClass()); conf.setIndexingChain(null); - assertTrue(DocumentsWriter.defaultIndexingChain == conf.getIndexingChain()); - + assertTrue(DocumentsWriterPerThread.defaultIndexingChain == conf.getIndexingChain()); + try { conf.setMaxBufferedDeleteTerms(0); fail("should not have succeeded to set maxBufferedDeleteTerms to 0"); @@ -239,14 +246,22 @@ public class TestIndexWriterConfig extends LuceneTestCase { // this is expected } - assertEquals(IndexWriterConfig.DEFAULT_MAX_THREAD_STATES, conf.getMaxThreadStates()); - conf.setMaxThreadStates(5); - assertEquals(5, conf.getMaxThreadStates()); - conf.setMaxThreadStates(0); - assertEquals(IndexWriterConfig.DEFAULT_MAX_THREAD_STATES, conf.getMaxThreadStates()); + try { + conf.setRAMPerThreadHardLimitMB(2048); + fail("should not have succeeded to set RAMPerThreadHardLimitMB to >= 2048"); + } catch (IllegalArgumentException e) { + // this is expected + } + + try { + conf.setRAMPerThreadHardLimitMB(0); + fail("should not have succeeded to set RAMPerThreadHardLimitMB to 0"); + } catch (IllegalArgumentException e) { + // this is expected + } // Test MergePolicy - assertEquals(LogByteSizeMergePolicy.class, conf.getMergePolicy().getClass()); + assertEquals(TieredMergePolicy.class, conf.getMergePolicy().getClass()); conf.setMergePolicy(new LogDocMergePolicy()); assertEquals(LogDocMergePolicy.class, conf.getMergePolicy().getClass()); conf.setMergePolicy(null); diff --git a/lucene/src/test/org/apache/lucene/index/TestIndexWriterDelete.java b/lucene/src/test/org/apache/lucene/index/TestIndexWriterDelete.java index 0acc750376a..fcc3508c5d7 100644 --- a/lucene/src/test/org/apache/lucene/index/TestIndexWriterDelete.java +++ b/lucene/src/test/org/apache/lucene/index/TestIndexWriterDelete.java @@ -33,7 +33,7 @@ import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util._TestUtil; public class TestIndexWriterDelete extends LuceneTestCase { - + // test the simple case public void testSimpleCase() throws IOException { String[] keywords = { "1", "2" }; @@ -44,7 +44,7 @@ public class TestIndexWriterDelete extends LuceneTestCase { Directory dir = newDirectory(); IndexWriter modifier = new IndexWriter(dir, newIndexWriterConfig( - TEST_VERSION_CURRENT, new MockAnalyzer(MockTokenizer.WHITESPACE, false)).setMaxBufferedDeleteTerms(1)); + TEST_VERSION_CURRENT, new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)).setMaxBufferedDeleteTerms(1)); for (int i = 0; i < keywords.length; i++) { Document doc = new Document(); @@ -79,7 +79,7 @@ public class TestIndexWriterDelete extends LuceneTestCase { Directory dir = newDirectory(); IndexWriter modifier = new IndexWriter(dir, newIndexWriterConfig( - TEST_VERSION_CURRENT, new MockAnalyzer(MockTokenizer.WHITESPACE, false)).setMaxBufferedDocs(2) + TEST_VERSION_CURRENT, new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)).setMaxBufferedDocs(2) .setMaxBufferedDeleteTerms(2)); modifier.setInfoStream(VERBOSE ? System.out : null); int id = 0; @@ -113,7 +113,7 @@ public class TestIndexWriterDelete extends LuceneTestCase { public void testMaxBufferedDeletes() throws IOException { Directory dir = newDirectory(); IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( - TEST_VERSION_CURRENT, new MockAnalyzer(MockTokenizer.WHITESPACE, false)).setMaxBufferedDeleteTerms(1)); + TEST_VERSION_CURRENT, new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)).setMaxBufferedDeleteTerms(1)); writer.setInfoStream(VERBOSE ? System.out : null); writer.addDocument(new Document()); @@ -124,7 +124,7 @@ public class TestIndexWriterDelete extends LuceneTestCase { writer.close(); dir.close(); } - + // test when delete terms only apply to ram segments public void testRAMDeletes() throws IOException { for(int t=0;t<2;t++) { @@ -133,7 +133,7 @@ public class TestIndexWriterDelete extends LuceneTestCase { } Directory dir = newDirectory(); IndexWriter modifier = new IndexWriter(dir, newIndexWriterConfig( - TEST_VERSION_CURRENT, new MockAnalyzer(MockTokenizer.WHITESPACE, false)).setMaxBufferedDocs(4) + TEST_VERSION_CURRENT, new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)).setMaxBufferedDocs(4) .setMaxBufferedDeleteTerms(4)); modifier.setInfoStream(VERBOSE ? System.out : null); int id = 0; @@ -172,7 +172,7 @@ public class TestIndexWriterDelete extends LuceneTestCase { public void testBothDeletes() throws IOException { Directory dir = newDirectory(); IndexWriter modifier = new IndexWriter(dir, newIndexWriterConfig( - TEST_VERSION_CURRENT, new MockAnalyzer(MockTokenizer.WHITESPACE, false)).setMaxBufferedDocs(100) + TEST_VERSION_CURRENT, new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)).setMaxBufferedDocs(100) .setMaxBufferedDeleteTerms(100)); int id = 0; @@ -206,7 +206,7 @@ public class TestIndexWriterDelete extends LuceneTestCase { public void testBatchDeletes() throws IOException { Directory dir = newDirectory(); IndexWriter modifier = new IndexWriter(dir, newIndexWriterConfig( - TEST_VERSION_CURRENT, new MockAnalyzer(MockTokenizer.WHITESPACE, false)).setMaxBufferedDocs(2) + TEST_VERSION_CURRENT, new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)).setMaxBufferedDocs(2) .setMaxBufferedDeleteTerms(2)); int id = 0; @@ -220,7 +220,7 @@ public class TestIndexWriterDelete extends LuceneTestCase { IndexReader reader = IndexReader.open(dir, true); assertEquals(7, reader.numDocs()); reader.close(); - + id = 0; modifier.deleteDocuments(new Term("id", String.valueOf(++id))); modifier.deleteDocuments(new Term("id", String.valueOf(++id))); @@ -249,7 +249,7 @@ public class TestIndexWriterDelete extends LuceneTestCase { public void testDeleteAll() throws IOException { Directory dir = newDirectory(); IndexWriter modifier = new IndexWriter(dir, newIndexWriterConfig( - TEST_VERSION_CURRENT, new MockAnalyzer(MockTokenizer.WHITESPACE, false)).setMaxBufferedDocs(2) + TEST_VERSION_CURRENT, new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)).setMaxBufferedDocs(2) .setMaxBufferedDeleteTerms(2)); int id = 0; @@ -295,35 +295,35 @@ public class TestIndexWriterDelete extends LuceneTestCase { public void testDeleteAllRollback() throws IOException { Directory dir = newDirectory(); IndexWriter modifier = new IndexWriter(dir, newIndexWriterConfig( - TEST_VERSION_CURRENT, new MockAnalyzer(MockTokenizer.WHITESPACE, false)).setMaxBufferedDocs(2) + TEST_VERSION_CURRENT, new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)).setMaxBufferedDocs(2) .setMaxBufferedDeleteTerms(2)); - + int id = 0; int value = 100; - + for (int i = 0; i < 7; i++) { addDoc(modifier, ++id, value); } modifier.commit(); - + addDoc(modifier, ++id, value); IndexReader reader = IndexReader.open(dir, true); assertEquals(7, reader.numDocs()); reader.close(); - + // Delete all - modifier.deleteAll(); + modifier.deleteAll(); // Roll it back modifier.rollback(); modifier.close(); - + // Validate that the docs are still there reader = IndexReader.open(dir, true); assertEquals(7, reader.numDocs()); reader.close(); - + dir.close(); } @@ -332,12 +332,12 @@ public class TestIndexWriterDelete extends LuceneTestCase { public void testDeleteAllNRT() throws IOException { Directory dir = newDirectory(); IndexWriter modifier = new IndexWriter(dir, newIndexWriterConfig( - TEST_VERSION_CURRENT, new MockAnalyzer(MockTokenizer.WHITESPACE, false)).setMaxBufferedDocs(2) + TEST_VERSION_CURRENT, new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)).setMaxBufferedDocs(2) .setMaxBufferedDeleteTerms(2)); - + int id = 0; int value = 100; - + for (int i = 0; i < 7; i++) { addDoc(modifier, ++id, value); } @@ -349,24 +349,24 @@ public class TestIndexWriterDelete extends LuceneTestCase { addDoc(modifier, ++id, value); addDoc(modifier, ++id, value); - + // Delete all - modifier.deleteAll(); + modifier.deleteAll(); reader = modifier.getReader(); assertEquals(0, reader.numDocs()); reader.close(); - + // Roll it back modifier.rollback(); modifier.close(); - + // Validate that the docs are still there reader = IndexReader.open(dir, true); assertEquals(7, reader.numDocs()); reader.close(); - + dir.close(); } @@ -423,7 +423,7 @@ public class TestIndexWriterDelete extends LuceneTestCase { MockDirectoryWrapper startDir = newDirectory(); // TODO: find the resource leak that only occurs sometimes here. startDir.setNoDeleteOpenFile(false); - IndexWriter writer = new IndexWriter(startDir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(MockTokenizer.WHITESPACE, false))); + IndexWriter writer = new IndexWriter(startDir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random, MockTokenizer.WHITESPACE, false))); for (int i = 0; i < 157; i++) { Document d = new Document(); d.add(newField("id", Integer.toString(i), Field.Store.YES, @@ -450,7 +450,7 @@ public class TestIndexWriterDelete extends LuceneTestCase { dir.setPreventDoubleWrite(false); IndexWriter modifier = new IndexWriter(dir, newIndexWriterConfig( - TEST_VERSION_CURRENT, new MockAnalyzer(MockTokenizer.WHITESPACE, false)) + TEST_VERSION_CURRENT, new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)) .setMaxBufferedDocs(1000) .setMaxBufferedDeleteTerms(1000) .setMergeScheduler(new ConcurrentMergeScheduler())); @@ -536,10 +536,15 @@ public class TestIndexWriterDelete extends LuceneTestCase { fail(testName + " hit IOException after disk space was freed up"); } } - + // prevent throwing a random exception here!! + final double randomIOExceptionRate = dir.getRandomIOExceptionRate(); + final long maxSizeInBytes = dir.getMaxSizeInBytes(); + dir.setRandomIOExceptionRate(0.0); + dir.setMaxSizeInBytes(0); if (!success) { // Must force the close else the writer can have // open files which cause exc in MockRAMDir.close + modifier.rollback(); } @@ -549,6 +554,8 @@ public class TestIndexWriterDelete extends LuceneTestCase { _TestUtil.checkIndex(dir); TestIndexWriter.assertNoUnreferencedFiles(dir, "after writer.close"); } + dir.setRandomIOExceptionRate(randomIOExceptionRate); + dir.setMaxSizeInBytes(maxSizeInBytes); // Finally, verify index is not corrupt, and, if // we succeeded, we see all docs changed, and if @@ -619,7 +626,7 @@ public class TestIndexWriterDelete extends LuceneTestCase { // This test tests that buffered deletes are cleared when // an Exception is hit during flush. public void testErrorAfterApplyDeletes() throws IOException { - + MockDirectoryWrapper.Failure failure = new MockDirectoryWrapper.Failure() { boolean sawMaybe = false; boolean failed = false; @@ -682,7 +689,7 @@ public class TestIndexWriterDelete extends LuceneTestCase { MockDirectoryWrapper dir = newDirectory(); IndexWriter modifier = new IndexWriter(dir, newIndexWriterConfig( - TEST_VERSION_CURRENT, new MockAnalyzer(MockTokenizer.WHITESPACE, false)).setMaxBufferedDeleteTerms(2).setReaderPooling(false).setMergePolicy(newLogMergePolicy())); + TEST_VERSION_CURRENT, new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)).setMaxBufferedDeleteTerms(2).setReaderPooling(false).setMergePolicy(newLogMergePolicy())); modifier.setInfoStream(VERBOSE ? System.out : null); LogMergePolicy lmp = (LogMergePolicy) modifier.getConfig().getMergePolicy(); @@ -783,7 +790,7 @@ public class TestIndexWriterDelete extends LuceneTestCase { // a segment is written are cleaned up if there's an i/o error public void testErrorInDocsWriterAdd() throws IOException { - + MockDirectoryWrapper.Failure failure = new MockDirectoryWrapper.Failure() { boolean failed = false; @Override @@ -809,7 +816,7 @@ public class TestIndexWriterDelete extends LuceneTestCase { String[] text = { "Amsterdam", "Venice" }; MockDirectoryWrapper dir = newDirectory(); - IndexWriter modifier = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(MockTokenizer.WHITESPACE, false))); + IndexWriter modifier = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random, MockTokenizer.WHITESPACE, false))); modifier.commit(); dir.failOn(failure.reset()); @@ -837,7 +844,7 @@ public class TestIndexWriterDelete extends LuceneTestCase { public void testDeleteNullQuery() throws IOException { Directory dir = newDirectory(); - IndexWriter modifier = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(MockTokenizer.WHITESPACE, false))); + IndexWriter modifier = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random, MockTokenizer.WHITESPACE, false))); for (int i = 0; i < 5; i++) { addDoc(modifier, i, 2*i); diff --git a/lucene/src/test/org/apache/lucene/index/TestIndexWriterExceptions.java b/lucene/src/test/org/apache/lucene/index/TestIndexWriterExceptions.java index 88bcd50fbf3..4769319accf 100644 --- a/lucene/src/test/org/apache/lucene/index/TestIndexWriterExceptions.java +++ b/lucene/src/test/org/apache/lucene/index/TestIndexWriterExceptions.java @@ -155,7 +155,7 @@ public class TestIndexWriterExceptions extends LuceneTestCase { } MockDirectoryWrapper dir = newDirectory(); - MockIndexWriter writer = new MockIndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer()) + MockIndexWriter writer = new MockIndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)) .setRAMBufferSizeMB(0.1).setMergeScheduler(new ConcurrentMergeScheduler())); ((ConcurrentMergeScheduler) writer.getConfig().getMergeScheduler()).setSuppressExceptions(); //writer.setMaxBufferedDocs(10); @@ -201,7 +201,7 @@ public class TestIndexWriterExceptions extends LuceneTestCase { public void testRandomExceptionsThreads() throws Throwable { MockDirectoryWrapper dir = newDirectory(); - MockIndexWriter writer = new MockIndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer()) + MockIndexWriter writer = new MockIndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)) .setRAMBufferSizeMB(0.2).setMergeScheduler(new ConcurrentMergeScheduler())); ((ConcurrentMergeScheduler) writer.getConfig().getMergeScheduler()).setSuppressExceptions(); //writer.setMaxBufferedDocs(10); @@ -223,8 +223,9 @@ public class TestIndexWriterExceptions extends LuceneTestCase { threads[i].join(); for(int i=0;i thrown = new ArrayList(); final Directory dir = newDirectory(); final IndexWriter writer = new IndexWriter(dir, - newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer())) { + newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random))) { @Override public void message(final String message) { if (message.startsWith("now flush at close") && 0 == thrown.size()) { @@ -930,7 +931,7 @@ public class TestIndexWriterExceptions extends LuceneTestCase { writer.close(); dir.close(); } - + // LUCENE-1347 private static final class MockIndexWriter4 extends IndexWriter { @@ -947,11 +948,11 @@ public class TestIndexWriterExceptions extends LuceneTestCase { return true; } } - + // LUCENE-1347 public void testRollbackExceptionHang() throws Throwable { Directory dir = newDirectory(); - MockIndexWriter4 w = new MockIndexWriter4(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer())); + MockIndexWriter4 w = new MockIndexWriter4(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random))); addDoc(w); w.doFail = true; @@ -961,19 +962,19 @@ public class TestIndexWriterExceptions extends LuceneTestCase { } catch (RuntimeException re) { // expected } - + w.doFail = false; w.rollback(); dir.close(); } - + // LUCENE-1044: Simulate checksum error in segments_N public void testSegmentsChecksumError() throws IOException { Directory dir = newDirectory(); IndexWriter writer = null; - writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer())); + writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random))); // add 100 documents for (int i = 0; i < 100; i++) { @@ -1005,7 +1006,7 @@ public class TestIndexWriterExceptions extends LuceneTestCase { reader.close(); dir.close(); } - + // Simulate a corrupt index by removing last byte of // latest segments file and make sure we get an // IOException trying to open the index: @@ -1015,7 +1016,7 @@ public class TestIndexWriterExceptions extends LuceneTestCase { IndexWriter writer = null; - writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer())); + writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random))); // add 100 documents for (int i = 0; i < 100; i++) { @@ -1053,7 +1054,7 @@ public class TestIndexWriterExceptions extends LuceneTestCase { } dir.close(); } - + // Simulate a corrupt index by removing one of the cfs // files and make sure we get an IOException trying to // open the index: @@ -1064,7 +1065,7 @@ public class TestIndexWriterExceptions extends LuceneTestCase { writer = new IndexWriter( dir, - newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()). + newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)). setMergePolicy(newLogMergePolicy(true)) ); ((LogMergePolicy) writer.getConfig().getMergePolicy()).setNoCFSRatio(1.0); @@ -1102,7 +1103,7 @@ public class TestIndexWriterExceptions extends LuceneTestCase { } dir.close(); } - + // Simulate a writer that crashed while writing segments // file: make sure we can still open the index (ie, // gracefully fallback to the previous segments file), @@ -1113,7 +1114,7 @@ public class TestIndexWriterExceptions extends LuceneTestCase { IndexWriter writer = null; - writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer())); + writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random))); // add 100 documents for (int i = 0; i < 100; i++) { @@ -1151,7 +1152,7 @@ public class TestIndexWriterExceptions extends LuceneTestCase { reader.close(); try { - writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer()).setOpenMode(OpenMode.CREATE)); + writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)).setOpenMode(OpenMode.CREATE)); } catch (Exception e) { e.printStackTrace(System.out); fail("writer failed to open on a crashed index"); diff --git a/lucene/src/test/org/apache/lucene/index/TestIndexWriterLockRelease.java b/lucene/src/test/org/apache/lucene/index/TestIndexWriterLockRelease.java index 1f13b9b48a4..58eea6b9609 100644 --- a/lucene/src/test/org/apache/lucene/index/TestIndexWriterLockRelease.java +++ b/lucene/src/test/org/apache/lucene/index/TestIndexWriterLockRelease.java @@ -21,6 +21,7 @@ import java.io.File; import java.io.FileNotFoundException; import java.io.IOException; import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.util._TestUtil; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig.OpenMode; @@ -39,7 +40,7 @@ public class TestIndexWriterLockRelease extends LuceneTestCase { public void setUp() throws Exception { super.setUp(); if (this.__test_dir == null) { - this.__test_dir = new File(TEMP_DIR, "testIndexWriter"); + this.__test_dir = _TestUtil.getTempDir("testIndexWriter"); if (this.__test_dir.exists()) { throw new IOException("test directory \"" + this.__test_dir.getPath() + "\" already exists (please remove by hand)"); @@ -75,10 +76,10 @@ public class TestIndexWriterLockRelease extends LuceneTestCase { public void testIndexWriterLockRelease() throws IOException { Directory dir = newFSDirectory(this.__test_dir); try { - new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).setOpenMode(OpenMode.APPEND)); + new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setOpenMode(OpenMode.APPEND)); } catch (FileNotFoundException e) { try { - new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).setOpenMode(OpenMode.APPEND)); + new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setOpenMode(OpenMode.APPEND)); } catch (FileNotFoundException e1) { } } finally { diff --git a/lucene/src/test/org/apache/lucene/index/TestIndexWriterMergePolicy.java b/lucene/src/test/org/apache/lucene/index/TestIndexWriterMergePolicy.java index 8b05149e3bd..1a426d9330a 100755 --- a/lucene/src/test/org/apache/lucene/index/TestIndexWriterMergePolicy.java +++ b/lucene/src/test/org/apache/lucene/index/TestIndexWriterMergePolicy.java @@ -34,7 +34,7 @@ public class TestIndexWriterMergePolicy extends LuceneTestCase { Directory dir = newDirectory(); IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( - TEST_VERSION_CURRENT, new MockAnalyzer()) + TEST_VERSION_CURRENT, new MockAnalyzer(random)) .setMaxBufferedDocs(10).setMergePolicy(new LogDocMergePolicy())); for (int i = 0; i < 100; i++) { @@ -51,7 +51,7 @@ public class TestIndexWriterMergePolicy extends LuceneTestCase { Directory dir = newDirectory(); IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( - TEST_VERSION_CURRENT, new MockAnalyzer()) + TEST_VERSION_CURRENT, new MockAnalyzer(random)) .setMaxBufferedDocs(10).setMergePolicy(new LogDocMergePolicy())); boolean noOverMerge = false; @@ -76,7 +76,7 @@ public class TestIndexWriterMergePolicy extends LuceneTestCase { mp.setMinMergeDocs(100); mp.setMergeFactor(10); IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( - TEST_VERSION_CURRENT, new MockAnalyzer()) + TEST_VERSION_CURRENT, new MockAnalyzer(random)) .setMaxBufferedDocs(10).setMergePolicy(mp)); for (int i = 0; i < 100; i++) { @@ -86,7 +86,7 @@ public class TestIndexWriterMergePolicy extends LuceneTestCase { mp = new LogDocMergePolicy(); mp.setMergeFactor(10); writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, - new MockAnalyzer()).setOpenMode( + new MockAnalyzer(random)).setOpenMode( OpenMode.APPEND).setMaxBufferedDocs(10).setMergePolicy(mp)); mp.setMinMergeDocs(100); checkInvariants(writer); @@ -102,9 +102,9 @@ public class TestIndexWriterMergePolicy extends LuceneTestCase { IndexWriter writer = new IndexWriter( dir, - newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()). + newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)). setMaxBufferedDocs(10). - setMergePolicy(newInOrderLogMergePolicy()) + setMergePolicy(newLogMergePolicy()) ); for (int i = 0; i < 250; i++) { @@ -130,8 +130,9 @@ public class TestIndexWriterMergePolicy extends LuceneTestCase { Directory dir = newDirectory(); IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( - TEST_VERSION_CURRENT, new MockAnalyzer()) - .setMaxBufferedDocs(101).setMergePolicy(new LogDocMergePolicy())); + TEST_VERSION_CURRENT, new MockAnalyzer(random)) + .setMaxBufferedDocs(101).setMergePolicy(new LogDocMergePolicy()) + .setMergeScheduler(new SerialMergeScheduler())); // leftmost* segment has 1 doc // rightmost* segment has 100 docs @@ -143,17 +144,17 @@ public class TestIndexWriterMergePolicy extends LuceneTestCase { writer.close(); writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, - new MockAnalyzer()).setOpenMode( - OpenMode.APPEND).setMaxBufferedDocs(101).setMergePolicy( - new LogDocMergePolicy())); + new MockAnalyzer(random)).setOpenMode( + OpenMode.APPEND).setMaxBufferedDocs(101).setMergePolicy(new LogDocMergePolicy()) + .setMergeScheduler(new SerialMergeScheduler())); } writer.close(); LogDocMergePolicy ldmp = new LogDocMergePolicy(); ldmp.setMergeFactor(10); writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, - new MockAnalyzer()).setOpenMode( - OpenMode.APPEND).setMaxBufferedDocs(10).setMergePolicy(ldmp).setMergeScheduler(new ConcurrentMergeScheduler())); + new MockAnalyzer(random)).setOpenMode( + OpenMode.APPEND).setMaxBufferedDocs(10).setMergePolicy(ldmp).setMergeScheduler(new SerialMergeScheduler())); // merge policy only fixes segments on levels where merges // have been triggered, so check invariants after all adds @@ -181,7 +182,7 @@ public class TestIndexWriterMergePolicy extends LuceneTestCase { LogDocMergePolicy ldmp = new LogDocMergePolicy(); ldmp.setMergeFactor(100); IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( - TEST_VERSION_CURRENT, new MockAnalyzer()) + TEST_VERSION_CURRENT, new MockAnalyzer(random)) .setMaxBufferedDocs(10).setMergePolicy(ldmp)); for (int i = 0; i < 250; i++) { @@ -197,7 +198,7 @@ public class TestIndexWriterMergePolicy extends LuceneTestCase { ldmp = new LogDocMergePolicy(); ldmp.setMergeFactor(5); writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, - new MockAnalyzer()).setOpenMode( + new MockAnalyzer(random)).setOpenMode( OpenMode.APPEND).setMaxBufferedDocs(10).setMergePolicy(ldmp).setMergeScheduler(new ConcurrentMergeScheduler())); // merge factor is changed, so check invariants after all adds @@ -236,13 +237,13 @@ public class TestIndexWriterMergePolicy extends LuceneTestCase { int segmentCount = writer.getSegmentCount(); for (int i = segmentCount - 1; i >= 0; i--) { int docCount = writer.getDocCount(i); - assertTrue("docCount=" + docCount + " lowerBound=" + lowerBound + " i=" + i + " segmentCount=" + segmentCount + " index=" + writer.segString(), docCount > lowerBound); + assertTrue("docCount=" + docCount + " lowerBound=" + lowerBound + " upperBound=" + upperBound + " i=" + i + " segmentCount=" + segmentCount + " index=" + writer.segString() + " config=" + writer.getConfig(), docCount > lowerBound); if (docCount <= upperBound) { numSegments++; } else { if (upperBound * mergeFactor <= maxMergeDocs) { - assertTrue("maxMergeDocs=" + maxMergeDocs + "; numSegments=" + numSegments + "; upperBound=" + upperBound + "; mergeFactor=" + mergeFactor + "; segs=" + writer.segString(), numSegments < mergeFactor); + assertTrue("maxMergeDocs=" + maxMergeDocs + "; numSegments=" + numSegments + "; upperBound=" + upperBound + "; mergeFactor=" + mergeFactor + "; segs=" + writer.segString() + " config=" + writer.getConfig(), numSegments < mergeFactor); } do { diff --git a/lucene/src/test/org/apache/lucene/index/TestIndexWriterMerging.java b/lucene/src/test/org/apache/lucene/index/TestIndexWriterMerging.java index 87883d635a1..a454ff44446 100644 --- a/lucene/src/test/org/apache/lucene/index/TestIndexWriterMerging.java +++ b/lucene/src/test/org/apache/lucene/index/TestIndexWriterMerging.java @@ -57,8 +57,8 @@ public class TestIndexWriterMerging extends LuceneTestCase IndexWriter writer = new IndexWriter( merged, - newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()). - setMergePolicy(newInOrderLogMergePolicy(2)) + newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)). + setMergePolicy(newLogMergePolicy(2)) ); writer.setInfoStream(VERBOSE ? System.out : null); writer.addIndexes(indexA, indexB); @@ -98,10 +98,10 @@ public class TestIndexWriterMerging extends LuceneTestCase IndexWriter writer = new IndexWriter( dir, - newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()). + newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)). setOpenMode(OpenMode.CREATE). setMaxBufferedDocs(2). - setMergePolicy(newInOrderLogMergePolicy(2)) + setMergePolicy(newLogMergePolicy(2)) ); for (int i = start; i < (start + numDocs); i++) diff --git a/lucene/src/test/org/apache/lucene/index/TestIndexWriterOnDiskFull.java b/lucene/src/test/org/apache/lucene/index/TestIndexWriterOnDiskFull.java index 929bebc145f..5ecf258c2ca 100644 --- a/lucene/src/test/org/apache/lucene/index/TestIndexWriterOnDiskFull.java +++ b/lucene/src/test/org/apache/lucene/index/TestIndexWriterOnDiskFull.java @@ -58,7 +58,7 @@ public class TestIndexWriterOnDiskFull extends LuceneTestCase { } MockDirectoryWrapper dir = new MockDirectoryWrapper(random, new RAMDirectory()); dir.setMaxSizeInBytes(diskFree); - IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer())); + IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random))); writer.setInfoStream(VERBOSE ? System.out : null); MergeScheduler ms = writer.getConfig().getMergeScheduler(); if (ms instanceof ConcurrentMergeScheduler) { @@ -120,7 +120,7 @@ public class TestIndexWriterOnDiskFull extends LuceneTestCase { dir.close(); // Now try again w/ more space: - diskFree += _TestUtil.nextInt(random, 400, 600); + diskFree += TEST_NIGHTLY ? _TestUtil.nextInt(random, 400, 600) : _TestUtil.nextInt(random, 3000, 5000); } else { //_TestUtil.syncConcurrentMerges(writer); dir.setMaxSizeInBytes(0); @@ -152,7 +152,7 @@ public class TestIndexWriterOnDiskFull extends LuceneTestCase { long inputDiskUsage = 0; for(int i=0;i 0); reader.close(); - + dir.close(); } } @@ -222,7 +222,7 @@ public class TestIndexWriterWithThreads extends LuceneTestCase { IndexWriter writer = new IndexWriter( dir, - newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()). + newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)). setMaxBufferedDocs(2). setMergeScheduler(new ConcurrentMergeScheduler()). setMergePolicy(newLogMergePolicy(4)) @@ -277,7 +277,7 @@ public class TestIndexWriterWithThreads extends LuceneTestCase { public void _testSingleThreadFailure(MockDirectoryWrapper.Failure failure) throws IOException { MockDirectoryWrapper dir = newDirectory(); - IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer()) + IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)) .setMaxBufferedDocs(2).setMergeScheduler(new ConcurrentMergeScheduler())); final Document doc = new Document(); doc.add(newField("field", "aaa bbb ccc ddd eee fff ggg hhh iii jjj", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); @@ -314,7 +314,7 @@ public class TestIndexWriterWithThreads extends LuceneTestCase { boolean sawClose = false; for (int i = 0; i < trace.length; i++) { if ("abort".equals(trace[i].getMethodName()) || - "flushDocument".equals(trace[i].getMethodName())) { + "finishDocument".equals(trace[i].getMethodName())) { sawAbortOrFlushDoc = true; } if ("close".equals(trace[i].getMethodName())) { diff --git a/lucene/src/test/org/apache/lucene/index/TestLazyBug.java b/lucene/src/test/org/apache/lucene/index/TestLazyBug.java index 13b668417c9..da5100aaa3b 100755 --- a/lucene/src/test/org/apache/lucene/index/TestLazyBug.java +++ b/lucene/src/test/org/apache/lucene/index/TestLazyBug.java @@ -28,7 +28,7 @@ import org.apache.lucene.util.LuceneTestCase; /** - * Test demonstrating EOF bug on the last field of the last doc + * Test demonstrating EOF bug on the last field of the last doc * if other docs have allready been accessed. */ public class TestLazyBug extends LuceneTestCase { @@ -47,9 +47,9 @@ public class TestLazyBug extends LuceneTestCase { }; private static Set dataset = asSet(data); - + private static String MAGIC_FIELD = "f"+(NUM_FIELDS/3); - + private static FieldSelector SELECTOR = new FieldSelector() { public FieldSelectorResult accept(String f) { if (f.equals(MAGIC_FIELD)) { @@ -58,22 +58,21 @@ public class TestLazyBug extends LuceneTestCase { return FieldSelectorResult.LAZY_LOAD; } }; - - private Directory makeIndex() throws Exception { + + private Directory makeIndex() throws Exception { Directory dir = newDirectory(); try { IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( - TEST_VERSION_CURRENT, new MockAnalyzer()).setMergePolicy(newLogMergePolicy())); + TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMergePolicy(newLogMergePolicy())); LogMergePolicy lmp = (LogMergePolicy) writer.getConfig().getMergePolicy(); lmp.setUseCompoundFile(false); - for (int d = 1; d <= NUM_DOCS; d++) { Document doc = new Document(); for (int f = 1; f <= NUM_FIELDS; f++ ) { - doc.add(newField("f"+f, - data[f % data.length] - + '#' + data[random.nextInt(data.length)], - Field.Store.YES, + doc.add(newField("f"+f, + data[f % data.length] + + '#' + data[random.nextInt(data.length)], + Field.Store.YES, Field.Index.ANALYZED)); } writer.addDocument(doc); @@ -84,14 +83,14 @@ public class TestLazyBug extends LuceneTestCase { } return dir; } - + public void doTest(int[] docs) throws Exception { Directory dir = makeIndex(); IndexReader reader = IndexReader.open(dir, true); for (int i = 0; i < docs.length; i++) { Document d = reader.document(docs[i], SELECTOR); d.get(MAGIC_FIELD); - + List fields = d.getFields(); for (Iterator fi = fields.iterator(); fi.hasNext(); ) { Fieldable f=null; @@ -101,7 +100,7 @@ public class TestLazyBug extends LuceneTestCase { String fval = f.stringValue(); assertNotNull(docs[i]+" FIELD: "+fname, fval); String[] vals = fval.split("#"); - if (!dataset.contains(vals[0]) || !dataset.contains(vals[1])) { + if (!dataset.contains(vals[0]) || !dataset.contains(vals[1])) { fail("FIELD:"+fname+",VAL:"+fval); } } catch (Exception e) { @@ -116,7 +115,7 @@ public class TestLazyBug extends LuceneTestCase { public void testLazyWorks() throws Exception { doTest(new int[] { 399 }); } - + public void testLazyAlsoWorks() throws Exception { doTest(new int[] { 399, 150 }); } diff --git a/lucene/src/test/org/apache/lucene/index/TestLazyProxSkipping.java b/lucene/src/test/org/apache/lucene/index/TestLazyProxSkipping.java index 8bd31b4a73c..331430e435c 100755 --- a/lucene/src/test/org/apache/lucene/index/TestLazyProxSkipping.java +++ b/lucene/src/test/org/apache/lucene/index/TestLazyProxSkipping.java @@ -18,9 +18,12 @@ package org.apache.lucene.index; */ import java.io.IOException; +import java.io.Reader; +import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.analysis.MockTokenizer; +import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.document.Document; import org.apache.lucene.index.codecs.CodecProvider; import org.apache.lucene.document.Field; @@ -67,10 +70,17 @@ public class TestLazyProxSkipping extends LuceneTestCase { private void createIndex(int numHits) throws IOException { int numDocs = 500; + final Analyzer analyzer = new Analyzer() { + @Override + public TokenStream tokenStream(String fieldName, Reader reader) { + return new MockTokenizer(reader, MockTokenizer.WHITESPACE, true); + } + }; Directory directory = new SeekCountingDirectory(new RAMDirectory()); + // note: test explicitly disables payloads IndexWriter writer = new IndexWriter( directory, - newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(MockTokenizer.WHITESPACE, true, false)). + newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer). setMaxBufferedDocs(10). setMergePolicy(newLogMergePolicy(false)) ); @@ -133,7 +143,7 @@ public class TestLazyProxSkipping extends LuceneTestCase { public void testSeek() throws IOException { Directory directory = newDirectory(); - IndexWriter writer = new IndexWriter(directory, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer())); + IndexWriter writer = new IndexWriter(directory, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random))); for (int i = 0; i < 10; i++) { Document doc = new Document(); doc.add(newField(this.field, "a b", Field.Store.YES, Field.Index.ANALYZED)); diff --git a/lucene/src/test/org/apache/lucene/index/TestLongPostings.java b/lucene/src/test/org/apache/lucene/index/TestLongPostings.java new file mode 100644 index 00000000000..fbe6fa1e0f4 --- /dev/null +++ b/lucene/src/test/org/apache/lucene/index/TestLongPostings.java @@ -0,0 +1,251 @@ +package org.apache.lucene.index; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.File; +import java.io.IOException; +import java.io.StringReader; + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.index.codecs.CodecProvider; +import org.apache.lucene.store.Directory; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.util.OpenBitSet; +import org.apache.lucene.util._TestUtil; + +public class TestLongPostings extends LuceneTestCase { + + // Produces a realistic unicode random string that + // survives MockAnalyzer unchanged: + private String getRandomTerm(String other) throws IOException { + Analyzer a = new MockAnalyzer(random); + while(true) { + String s = _TestUtil.randomRealisticUnicodeString(random); + if (other != null && s.equals(other)) { + continue; + } + final TokenStream ts = a.tokenStream("foo", new StringReader(s)); + final TermToBytesRefAttribute termAtt = ts.getAttribute(TermToBytesRefAttribute.class); + final BytesRef termBytes = termAtt.getBytesRef(); + int count = 0; + while(ts.incrementToken()) { + termAtt.fillBytesRef(); + if (count == 0 && !termBytes.utf8ToString().equals(s)) { + break; + } + count++; + } + if (count == 1) { + return s; + } + } + } + + public void testLongPostings() throws Exception { + assumeFalse("Too slow with SimpleText codec", CodecProvider.getDefault().getFieldCodec("field").equals("SimpleText")); + + // Don't use _TestUtil.getTempDir so that we own the + // randomness (ie same seed will point to same dir): + Directory dir = newFSDirectory(_TestUtil.getTempDir("longpostings" + "." + random.nextLong())); + + final int NUM_DOCS = (int) ((TEST_NIGHTLY ? 4e6 : (RANDOM_MULTIPLIER*2e4)) * (1+random.nextDouble())); + + if (VERBOSE) { + System.out.println("TEST: NUM_DOCS=" + NUM_DOCS); + } + + final String s1 = getRandomTerm(null); + final String s2 = getRandomTerm(s1); + + if (VERBOSE) { + System.out.println("\nTEST: s1=" + s1 + " s2=" + s2); + /* + for(int idx=0;idx 0); + assertTrue(r.docFreq(new Term("field", s2)) > 0); + + for(int iter=0;iter<1000*RANDOM_MULTIPLIER;iter++) { + + final String term; + final boolean doS1; + if (random.nextBoolean()) { + term = s1; + doS1 = true; + } else { + term = s2; + doS1 = false; + } + + if (VERBOSE) { + System.out.println("\nTEST: iter=" + iter + " doS1=" + doS1); + } + + final DocsAndPositionsEnum postings = MultiFields.getTermPositionsEnum(r, null, "field", new BytesRef(term)); + + int docID = -1; + while(docID < DocsEnum.NO_MORE_DOCS) { + final int what = random.nextInt(3); + if (what == 0) { + if (VERBOSE) { + System.out.println("TEST: docID=" + docID + "; do next()"); + } + // nextDoc + int expected = docID+1; + while(true) { + if (expected == NUM_DOCS) { + expected = Integer.MAX_VALUE; + break; + } else if (isS1.get(expected) == doS1) { + break; + } else { + expected++; + } + } + docID = postings.nextDoc(); + if (VERBOSE) { + System.out.println(" got docID=" + docID); + } + assertEquals(expected, docID); + if (docID == DocsEnum.NO_MORE_DOCS) { + break; + } + + if (random.nextInt(6) == 3) { + final int freq = postings.freq(); + assertTrue(freq >=1 && freq <= 4); + for(int pos=0;pos 5) { - lmp.setMergeFactor(5); - } - */ + _TestUtil.reduceOpenFiles(writer); final int NUM_INDEX_THREADS = 2; final int NUM_SEARCH_THREADS = 3; @@ -147,36 +136,36 @@ public class TestNRTThreads extends LuceneTestCase { } if (random.nextBoolean()) { if (VERBOSE) { - System.out.println(Thread.currentThread().getName() + ": add doc id:" + doc.get("id")); + //System.out.println(Thread.currentThread().getName() + ": add doc id:" + doc.get("docid")); } writer.addDocument(doc); } else { // we use update but it never replaces a // prior doc if (VERBOSE) { - System.out.println(Thread.currentThread().getName() + ": update doc id:" + doc.get("id")); + //System.out.println(Thread.currentThread().getName() + ": update doc id:" + doc.get("docid")); } - writer.updateDocument(new Term("id", doc.get("id")), doc); + writer.updateDocument(new Term("docid", doc.get("docid")), doc); } if (random.nextInt(5) == 3) { if (VERBOSE) { - System.out.println(Thread.currentThread().getName() + ": buffer del id:" + doc.get("id")); + //System.out.println(Thread.currentThread().getName() + ": buffer del id:" + doc.get("docid")); } - toDeleteIDs.add(doc.get("id")); + toDeleteIDs.add(doc.get("docid")); } if (random.nextInt(50) == 17) { if (VERBOSE) { - System.out.println(Thread.currentThread().getName() + ": apply " + toDeleteIDs.size() + " deletes"); + //System.out.println(Thread.currentThread().getName() + ": apply " + toDeleteIDs.size() + " deletes"); } for(String id : toDeleteIDs) { if (VERBOSE) { - System.out.println(Thread.currentThread().getName() + ": del term=id:" + id); + //System.out.println(Thread.currentThread().getName() + ": del term=id:" + id); } - writer.deleteDocuments(new Term("id", id)); + writer.deleteDocuments(new Term("docid", id)); } final int count = delCount.addAndGet(toDeleteIDs.size()); if (VERBOSE) { - System.out.println(Thread.currentThread().getName() + ": tot " + count + " deletes"); + //System.out.println(Thread.currentThread().getName() + ": tot " + count + " deletes"); } delIDs.addAll(toDeleteIDs); toDeleteIDs.clear(); @@ -357,18 +346,18 @@ public class TestNRTThreads extends LuceneTestCase { final IndexSearcher s = newSearcher(r2); boolean doFail = false; for(String id : delIDs) { - final TopDocs hits = s.search(new TermQuery(new Term("id", id)), 1); + final TopDocs hits = s.search(new TermQuery(new Term("docid", id)), 1); if (hits.totalHits != 0) { System.out.println("doc id=" + id + " is supposed to be deleted, but got docID=" + hits.scoreDocs[0].doc); doFail = true; } } - final int endID = Integer.parseInt(docs.nextDoc().get("id")); + final int endID = Integer.parseInt(docs.nextDoc().get("docid")); for(int id=0;id processors = new HashMap(); processors.put(dir, new PerTermPayloadProcessor()); - IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(MockTokenizer.WHITESPACE, false))); + IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random, MockTokenizer.WHITESPACE, false))); writer.setPayloadProcessorProvider(new PerDirPayloadProcessor(processors)); writer.optimize(); writer.close(); diff --git a/lucene/src/test/org/apache/lucene/index/TestPayloads.java b/lucene/src/test/org/apache/lucene/index/TestPayloads.java index 5841c79fc10..c5d7a3a1eaf 100644 --- a/lucene/src/test/org/apache/lucene/index/TestPayloads.java +++ b/lucene/src/test/org/apache/lucene/index/TestPayloads.java @@ -164,7 +164,7 @@ public class TestPayloads extends LuceneTestCase { IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, analyzer) .setOpenMode(OpenMode.CREATE) - .setMergePolicy(newInOrderLogMergePolicy())); + .setMergePolicy(newLogMergePolicy())); // should be in sync with value in TermInfosWriter final int skipInterval = 16; @@ -479,7 +479,7 @@ public class TestPayloads extends LuceneTestCase { Directory dir = newDirectory(); final IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( - TEST_VERSION_CURRENT, new MockAnalyzer())); + TEST_VERSION_CURRENT, new MockAnalyzer(random))); final String field = "test"; Thread[] ingesters = new Thread[numThreads]; @@ -600,16 +600,16 @@ public class TestPayloads extends LuceneTestCase { public void testAcrossFields() throws Exception { Directory dir = newDirectory(); RandomIndexWriter writer = new RandomIndexWriter(random, dir, - new MockAnalyzer(MockTokenizer.WHITESPACE, true, true)); + new MockAnalyzer(random, MockTokenizer.WHITESPACE, true)); Document doc = new Document(); - doc.add(new Field("haspayload", "here we go", Field.Store.YES, Field.Index.ANALYZED)); + doc.add(new Field("hasMaybepayload", "here we go", Field.Store.YES, Field.Index.ANALYZED)); writer.addDocument(doc); writer.close(); writer = new RandomIndexWriter(random, dir, - new MockAnalyzer(MockTokenizer.WHITESPACE, true, false)); + new MockAnalyzer(random, MockTokenizer.WHITESPACE, true)); doc = new Document(); - doc.add(new Field("nopayload", "here we go", Field.Store.YES, Field.Index.ANALYZED)); + doc.add(new Field("hasMaybepayload2", "here we go", Field.Store.YES, Field.Index.ANALYZED)); writer.addDocument(doc); writer.addDocument(doc); writer.optimize(); diff --git a/lucene/src/test/org/apache/lucene/index/TestPerFieldCodecSupport.java b/lucene/src/test/org/apache/lucene/index/TestPerFieldCodecSupport.java index d3164fc2fa5..680bb87221c 100644 --- a/lucene/src/test/org/apache/lucene/index/TestPerFieldCodecSupport.java +++ b/lucene/src/test/org/apache/lucene/index/TestPerFieldCodecSupport.java @@ -93,7 +93,7 @@ public class TestPerFieldCodecSupport extends LuceneTestCase { Directory dir = newDirectory(); CodecProvider provider = new MockCodecProvider(); IndexWriterConfig iwconf = newIndexWriterConfig(TEST_VERSION_CURRENT, - new MockAnalyzer()).setOpenMode(OpenMode.CREATE).setCodecProvider( + new MockAnalyzer(random)).setOpenMode(OpenMode.CREATE).setCodecProvider( provider); IndexWriter writer = newWriter(dir, iwconf); addDocs(writer, 10); @@ -121,7 +121,7 @@ public class TestPerFieldCodecSupport extends LuceneTestCase { System.out.println("TEST: make new index"); } IndexWriterConfig iwconf = newIndexWriterConfig(TEST_VERSION_CURRENT, - new MockAnalyzer()).setOpenMode(OpenMode.CREATE).setCodecProvider(provider); + new MockAnalyzer(random)).setOpenMode(OpenMode.CREATE).setCodecProvider(provider); iwconf.setMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH); //((LogMergePolicy) iwconf.getMergePolicy()).setMergeFactor(10); IndexWriter writer = newWriter(dir, iwconf); @@ -141,7 +141,7 @@ public class TestPerFieldCodecSupport extends LuceneTestCase { assertCodecPerField(_TestUtil.checkIndex(dir, provider), "content", provider.lookup("MockSep")); - iwconf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()) + iwconf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)) .setOpenMode(OpenMode.APPEND).setCodecProvider(provider); //((LogMergePolicy) iwconf.getMergePolicy()).setUseCompoundFile(false); //((LogMergePolicy) iwconf.getMergePolicy()).setMergeFactor(10); @@ -288,7 +288,7 @@ public class TestPerFieldCodecSupport extends LuceneTestCase { provider.setFieldCodec("" + j, codecs[random.nextInt(codecs.length)].name); } IndexWriterConfig config = newIndexWriterConfig(random, - TEST_VERSION_CURRENT, new MockAnalyzer()); + TEST_VERSION_CURRENT, new MockAnalyzer(random)); config.setOpenMode(OpenMode.CREATE_OR_APPEND); config.setCodecProvider(provider); IndexWriter writer = newWriter(dir, config); diff --git a/lucene/src/test/org/apache/lucene/index/TestPerSegmentDeletes.java b/lucene/src/test/org/apache/lucene/index/TestPerSegmentDeletes.java index c7312b45ab8..7110d1a4c35 100644 --- a/lucene/src/test/org/apache/lucene/index/TestPerSegmentDeletes.java +++ b/lucene/src/test/org/apache/lucene/index/TestPerSegmentDeletes.java @@ -32,14 +32,13 @@ import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.LuceneTestCase; -import org.apache.lucene.util.Version; public class TestPerSegmentDeletes extends LuceneTestCase { public void testDeletes1() throws Exception { //IndexWriter.debug2 = System.out; Directory dir = new MockDirectoryWrapper(new Random(random.nextLong()), new RAMDirectory()); - IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_CURRENT, - new MockAnalyzer()); + IndexWriterConfig iwc = new IndexWriterConfig(TEST_VERSION_CURRENT, + new MockAnalyzer(random)); iwc.setMergeScheduler(new SerialMergeScheduler()); iwc.setMaxBufferedDocs(5000); iwc.setRAMBufferSizeMB(100); @@ -66,22 +65,22 @@ public class TestPerSegmentDeletes extends LuceneTestCase { writer.addDocument(TestIndexWriterReader.createDocument(x, "3", 2)); //System.out.println("numRamDocs(" + x + ")" + writer.numRamDocs()); } - + writer.deleteDocuments(new Term("id", "1")); - + writer.deleteDocuments(new Term("id", "11")); - // flushing without applying deletes means + // flushing without applying deletes means // there will still be deletes in the segment infos writer.flush(false, false); assertTrue(writer.bufferedDeletesStream.any()); - + // get reader flushes pending deletes // so there should not be anymore IndexReader r1 = writer.getReader(); assertFalse(writer.bufferedDeletesStream.any()); r1.close(); - + // delete id:2 from the first segment // merge segments 0 and 1 // which should apply the delete id:2 @@ -91,16 +90,16 @@ public class TestPerSegmentDeletes extends LuceneTestCase { fsmp.start = 0; fsmp.length = 2; writer.maybeMerge(); - + assertEquals(2, writer.segmentInfos.size()); - + // id:2 shouldn't exist anymore because // it's been applied in the merge and now it's gone IndexReader r2 = writer.getReader(); int[] id2docs = toDocsArray(new Term("id", "2"), null, r2); assertTrue(id2docs == null); r2.close(); - + /** // added docs are in the ram buffer for (int x = 15; x < 20; x++) { @@ -110,43 +109,43 @@ public class TestPerSegmentDeletes extends LuceneTestCase { assertTrue(writer.numRamDocs() > 0); // delete from the ram buffer writer.deleteDocuments(new Term("id", Integer.toString(13))); - + Term id3 = new Term("id", Integer.toString(3)); - + // delete from the 1st segment writer.deleteDocuments(id3); - + assertTrue(writer.numRamDocs() > 0); - + //System.out // .println("segdels1:" + writer.docWriter.deletesToString()); - + //assertTrue(writer.docWriter.segmentDeletes.size() > 0); - + // we cause a merge to happen fsmp.doMerge = true; fsmp.start = 0; fsmp.length = 2; System.out.println("maybeMerge "+writer.segmentInfos); - + SegmentInfo info0 = writer.segmentInfos.get(0); SegmentInfo info1 = writer.segmentInfos.get(1); - + writer.maybeMerge(); System.out.println("maybeMerge after "+writer.segmentInfos); // there should be docs in RAM assertTrue(writer.numRamDocs() > 0); - + // assert we've merged the 1 and 2 segments // and still have a segment leftover == 2 assertEquals(2, writer.segmentInfos.size()); assertFalse(segThere(info0, writer.segmentInfos)); assertFalse(segThere(info1, writer.segmentInfos)); - + //System.out.println("segdels2:" + writer.docWriter.deletesToString()); - + //assertTrue(writer.docWriter.segmentDeletes.size() > 0); - + IndexReader r = writer.getReader(); IndexReader r1 = r.getSequentialSubReaders()[0]; printDelDocs(r1.getDeletedDocs()); @@ -155,7 +154,7 @@ public class TestPerSegmentDeletes extends LuceneTestCase { // there shouldn't be any docs for id:3 assertTrue(docs == null); r.close(); - + part2(writer, fsmp); **/ // System.out.println("segdels2:"+writer.docWriter.segmentDeletes.toString()); @@ -163,7 +162,7 @@ public class TestPerSegmentDeletes extends LuceneTestCase { writer.close(); dir.close(); } - + /** static boolean hasPendingDeletes(SegmentInfos infos) { for (SegmentInfo info : infos) { @@ -185,42 +184,42 @@ public class TestPerSegmentDeletes extends LuceneTestCase { //System.out.println("numRamDocs(" + x + ")" + writer.numRamDocs()); } writer.flush(false, false); - + //System.out.println("infos3:"+writer.segmentInfos); - + Term delterm = new Term("id", "8"); writer.deleteDocuments(delterm); //System.out.println("segdels3:" + writer.docWriter.deletesToString()); - + fsmp.doMerge = true; fsmp.start = 1; fsmp.length = 2; writer.maybeMerge(); - - // deletes for info1, the newly created segment from the + + // deletes for info1, the newly created segment from the // merge should have no deletes because they were applied in // the merge //SegmentInfo info1 = writer.segmentInfos.get(1); //assertFalse(exists(info1, writer.docWriter.segmentDeletes)); - + //System.out.println("infos4:"+writer.segmentInfos); //System.out.println("segdels4:" + writer.docWriter.deletesToString()); } - + boolean segThere(SegmentInfo info, SegmentInfos infos) { for (SegmentInfo si : infos) { - if (si.name.equals(info.name)) return true; + if (si.name.equals(info.name)) return true; } return false; } - + public static void printDelDocs(Bits bits) { if (bits == null) return; for (int x = 0; x < bits.length(); x++) { System.out.println(x + ":" + bits.get(x)); } } - + public static int[] toDocsArray(Term term, Bits bits, IndexReader reader) throws IOException { Fields fields = MultiFields.getFields(reader); @@ -233,7 +232,7 @@ public class TestPerSegmentDeletes extends LuceneTestCase { } return null; } - + public static int[] toArray(DocsEnum docsEnum) throws IOException { List docs = new ArrayList(); while (docsEnum.nextDoc() != DocsEnum.NO_MORE_DOCS) { @@ -242,21 +241,21 @@ public class TestPerSegmentDeletes extends LuceneTestCase { } return ArrayUtil.toIntArray(docs); } - + public class RangeMergePolicy extends MergePolicy { boolean doMerge = false; int start; int length; - + private final boolean useCompoundFile; - + private RangeMergePolicy(boolean useCompoundFile) { this.useCompoundFile = useCompoundFile; } - + @Override public void close() {} - + @Override public MergeSpecification findMerges(SegmentInfos segmentInfos) throws CorruptIndexException, IOException { @@ -273,20 +272,20 @@ public class TestPerSegmentDeletes extends LuceneTestCase { } return null; } - + @Override public MergeSpecification findMergesForOptimize(SegmentInfos segmentInfos, int maxSegmentCount, Set segmentsToOptimize) throws CorruptIndexException, IOException { return null; } - + @Override public MergeSpecification findMergesToExpungeDeletes( SegmentInfos segmentInfos) throws CorruptIndexException, IOException { return null; } - + @Override public boolean useCompoundFile(SegmentInfos segments, SegmentInfo newSegment) { return useCompoundFile; diff --git a/lucene/src/test/org/apache/lucene/index/TestRollback.java b/lucene/src/test/org/apache/lucene/index/TestRollback.java index c1a0d1fb4f2..f80026d574c 100644 --- a/lucene/src/test/org/apache/lucene/index/TestRollback.java +++ b/lucene/src/test/org/apache/lucene/index/TestRollback.java @@ -38,7 +38,7 @@ public class TestRollback extends LuceneTestCase { rw.close(); // If buffer size is small enough to cause a flush, errors ensue... - IndexWriter w = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer()).setMaxBufferedDocs(2).setOpenMode(IndexWriterConfig.OpenMode.APPEND)); + IndexWriter w = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMaxBufferedDocs(2).setOpenMode(IndexWriterConfig.OpenMode.APPEND)); Term pkTerm = new Term("pk", ""); for (int i = 0; i < 3; i++) { diff --git a/lucene/src/test/org/apache/lucene/index/TestRollingUpdates.java b/lucene/src/test/org/apache/lucene/index/TestRollingUpdates.java index c38fd2d4b2c..46689a202c7 100644 --- a/lucene/src/test/org/apache/lucene/index/TestRollingUpdates.java +++ b/lucene/src/test/org/apache/lucene/index/TestRollingUpdates.java @@ -19,6 +19,7 @@ package org.apache.lucene.index; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.document.*; +import org.apache.lucene.document.Field.Index; import org.apache.lucene.store.*; import org.apache.lucene.util.*; import org.junit.Test; @@ -34,7 +35,7 @@ public class TestRollingUpdates extends LuceneTestCase { final LineFileDocs docs = new LineFileDocs(random); - final IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer())); + final IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random))); final int SIZE = 200 * RANDOM_MULTIPLIER; int id = 0; IndexReader r = null; @@ -47,8 +48,8 @@ public class TestRollingUpdates extends LuceneTestCase { } else { id++; } - doc.getField("id").setValue(myID); - w.updateDocument(new Term("id", myID), doc); + doc.getField("docid").setValue(myID); + w.updateDocument(new Term("docid", myID), doc); if (docIter >= SIZE && random.nextInt(50) == 17) { if (r != null) { @@ -72,4 +73,72 @@ public class TestRollingUpdates extends LuceneTestCase { dir.close(); } + + + public void testUpdateSameDoc() throws Exception { + final Directory dir = newDirectory(); + + final LineFileDocs docs = new LineFileDocs(random); + for (int r = 0; r < 3; r++) { + final IndexWriter w = new IndexWriter(dir, newIndexWriterConfig( + TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMaxBufferedDocs(2)); + final int SIZE = 200 * RANDOM_MULTIPLIER; + final int numUpdates = (int) (SIZE * (2 + random.nextDouble())); + int numThreads = 3 + random.nextInt(12); + IndexingThread[] threads = new IndexingThread[numThreads]; + for (int i = 0; i < numThreads; i++) { + threads[i] = new IndexingThread(docs, w, numUpdates); + threads[i].start(); + } + + for (int i = 0; i < numThreads; i++) { + threads[i].join(); + } + + w.close(); + } + IndexReader open = IndexReader.open(dir); + assertEquals(1, open.numDocs()); + open.close(); + docs.close(); + dir.close(); + } + + static class IndexingThread extends Thread { + final LineFileDocs docs; + final IndexWriter writer; + final int num; + + public IndexingThread(LineFileDocs docs, IndexWriter writer, int num) { + super(); + this.docs = docs; + this.writer = writer; + this.num = num; + } + + public void run() { + try { + IndexReader open = null; + for (int i = 0; i < num; i++) { + Document doc = new Document();// docs.nextDoc(); + doc.add(newField("id", "test", Index.NOT_ANALYZED)); + writer.updateDocument(new Term("id", "test"), doc); + if (random.nextInt(10) == 0) { + if (open == null) + open = IndexReader.open(writer, true); + IndexReader reader = open.reopen(); + if (reader != open) { + open.close(); + open = reader; + } + assertEquals("iter: " + i + " numDocs: "+ open.numDocs() + " del: " + open.numDeletedDocs() + " max: " + open.maxDoc(), 1, open.numDocs()); + } + } + open.close(); + } catch (Exception e) { + fail(e.getMessage()); + } + + } + } } diff --git a/lucene/src/test/org/apache/lucene/index/TestSegmentInfo.java b/lucene/src/test/org/apache/lucene/index/TestSegmentInfo.java index d363edb777d..e23b0be1c91 100644 --- a/lucene/src/test/org/apache/lucene/index/TestSegmentInfo.java +++ b/lucene/src/test/org/apache/lucene/index/TestSegmentInfo.java @@ -29,7 +29,7 @@ public class TestSegmentInfo extends LuceneTestCase { public void testSizeInBytesCache() throws Exception { Directory dir = newDirectory(); - IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).setMergePolicy(newLogMergePolicy()); + IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMergePolicy(newLogMergePolicy()); IndexWriter writer = new IndexWriter(dir, conf); writer.setInfoStream(VERBOSE ? System.out : null); Document doc = new Document(); diff --git a/lucene/src/test/org/apache/lucene/index/TestSegmentMerger.java b/lucene/src/test/org/apache/lucene/index/TestSegmentMerger.java index 39d8386ec90..d161e130ccb 100644 --- a/lucene/src/test/org/apache/lucene/index/TestSegmentMerger.java +++ b/lucene/src/test/org/apache/lucene/index/TestSegmentMerger.java @@ -47,13 +47,13 @@ public class TestSegmentMerger extends LuceneTestCase { merge1Dir = newDirectory(); merge2Dir = newDirectory(); DocHelper.setupDoc(doc1); - SegmentInfo info1 = DocHelper.writeDoc(merge1Dir, doc1); + SegmentInfo info1 = DocHelper.writeDoc(random, merge1Dir, doc1); DocHelper.setupDoc(doc2); - SegmentInfo info2 = DocHelper.writeDoc(merge2Dir, doc2); + SegmentInfo info2 = DocHelper.writeDoc(random, merge2Dir, doc2); reader1 = SegmentReader.get(true, info1, IndexReader.DEFAULT_TERMS_INDEX_DIVISOR); reader2 = SegmentReader.get(true, info2, IndexReader.DEFAULT_TERMS_INDEX_DIVISOR); } - + @Override public void tearDown() throws Exception { reader1.close(); @@ -71,8 +71,8 @@ public class TestSegmentMerger extends LuceneTestCase { assertTrue(reader1 != null); assertTrue(reader2 != null); } - - public void testMerge() throws IOException { + + public void testMerge() throws IOException { SegmentMerger merger = new SegmentMerger(mergedDir, IndexWriterConfig.DEFAULT_TERM_INDEX_INTERVAL, mergedSegment, null, CodecProvider.getDefault(), null, new FieldInfos()); merger.add(reader1); merger.add(reader2); @@ -83,7 +83,6 @@ public class TestSegmentMerger extends LuceneTestCase { SegmentReader mergedReader = SegmentReader.get(false, mergedDir, new SegmentInfo(mergedSegment, docsMerged, mergedDir, false, fieldInfos.hasProx(), merger.getSegmentCodecs(), fieldInfos.hasVectors(), fieldInfos), BufferedIndexInput.BUFFER_SIZE, true, IndexReader.DEFAULT_TERMS_INDEX_DIVISOR); - assertTrue(mergedReader != null); assertTrue(mergedReader.numDocs() == 2); Document newDoc1 = mergedReader.document(0); @@ -93,19 +92,19 @@ public class TestSegmentMerger extends LuceneTestCase { Document newDoc2 = mergedReader.document(1); assertTrue(newDoc2 != null); assertTrue(DocHelper.numFields(newDoc2) == DocHelper.numFields(doc2) - DocHelper.unstored.size()); - + DocsEnum termDocs = MultiFields.getTermDocsEnum(mergedReader, MultiFields.getDeletedDocs(mergedReader), DocHelper.TEXT_FIELD_2_KEY, new BytesRef("field")); assertTrue(termDocs != null); assertTrue(termDocs.nextDoc() != DocsEnum.NO_MORE_DOCS); - + Collection stored = mergedReader.getFieldNames(IndexReader.FieldOption.INDEXED_WITH_TERMVECTOR); assertTrue(stored != null); //System.out.println("stored size: " + stored.size()); assertTrue("We do not have 3 fields that were indexed with term vector",stored.size() == 3); - + TermFreqVector vector = mergedReader.getTermFreqVector(0, DocHelper.TEXT_FIELD_2_KEY); assertTrue(vector != null); BytesRef [] terms = vector.getTerms(); @@ -116,7 +115,7 @@ public class TestSegmentMerger extends LuceneTestCase { assertTrue(freqs != null); //System.out.println("Freqs size: " + freqs.length); assertTrue(vector instanceof TermPositionVector == true); - + for (int i = 0; i < terms.length; i++) { String term = terms[i].utf8ToString(); int freq = freqs[i]; @@ -127,5 +126,5 @@ public class TestSegmentMerger extends LuceneTestCase { TestSegmentReader.checkNorms(mergedReader); mergedReader.close(); - } + } } diff --git a/lucene/src/test/org/apache/lucene/index/TestSegmentReader.java b/lucene/src/test/org/apache/lucene/index/TestSegmentReader.java index d284f11dfa0..7534e90f0b3 100644 --- a/lucene/src/test/org/apache/lucene/index/TestSegmentReader.java +++ b/lucene/src/test/org/apache/lucene/index/TestSegmentReader.java @@ -40,7 +40,7 @@ public class TestSegmentReader extends LuceneTestCase { super.setUp(); dir = newDirectory(); DocHelper.setupDoc(testDoc); - SegmentInfo info = DocHelper.writeDoc(dir, testDoc); + SegmentInfo info = DocHelper.writeDoc(random, dir, testDoc); reader = SegmentReader.get(true, info, IndexReader.DEFAULT_TERMS_INDEX_DIVISOR); } @@ -76,7 +76,7 @@ public class TestSegmentReader extends LuceneTestCase { public void testDelete() throws IOException { Document docToDelete = new Document(); DocHelper.setupDoc(docToDelete); - SegmentInfo info = DocHelper.writeDoc(dir, docToDelete); + SegmentInfo info = DocHelper.writeDoc(random, dir, docToDelete); SegmentReader deleteReader = SegmentReader.get(false, info, IndexReader.DEFAULT_TERMS_INDEX_DIVISOR); assertTrue(deleteReader != null); assertTrue(deleteReader.numDocs() == 1); diff --git a/lucene/src/test/org/apache/lucene/index/TestSegmentTermDocs.java b/lucene/src/test/org/apache/lucene/index/TestSegmentTermDocs.java index 11e7c33d0ee..96841d78d88 100644 --- a/lucene/src/test/org/apache/lucene/index/TestSegmentTermDocs.java +++ b/lucene/src/test/org/apache/lucene/index/TestSegmentTermDocs.java @@ -36,7 +36,7 @@ public class TestSegmentTermDocs extends LuceneTestCase { super.setUp(); dir = newDirectory(); DocHelper.setupDoc(testDoc); - info = DocHelper.writeDoc(dir, testDoc); + info = DocHelper.writeDoc(random, dir, testDoc); } @Override @@ -105,7 +105,7 @@ public class TestSegmentTermDocs extends LuceneTestCase { public void testSkipTo(int indexDivisor) throws IOException { Directory dir = newDirectory(); - IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer()).setMergePolicy(newInOrderLogMergePolicy())); + IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMergePolicy(newLogMergePolicy())); Term ta = new Term("content","aaa"); for(int i = 0; i < 10; i++) @@ -251,7 +251,7 @@ public class TestSegmentTermDocs extends LuceneTestCase { public void testIndexDivisor() throws IOException { testDoc = new Document(); DocHelper.setupDoc(testDoc); - DocHelper.writeDoc(dir, testDoc); + DocHelper.writeDoc(random, dir, testDoc); testTermDocs(2); testBadSeek(2); testSkipTo(2); diff --git a/lucene/src/test/org/apache/lucene/index/TestSegmentTermEnum.java b/lucene/src/test/org/apache/lucene/index/TestSegmentTermEnum.java index b29a2309204..4e9a44f57d0 100644 --- a/lucene/src/test/org/apache/lucene/index/TestSegmentTermEnum.java +++ b/lucene/src/test/org/apache/lucene/index/TestSegmentTermEnum.java @@ -48,7 +48,7 @@ public class TestSegmentTermEnum extends LuceneTestCase { public void testTermEnum() throws IOException { IndexWriter writer = null; - writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer())); + writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random))); // ADD 100 documents with term : aaa // add 100 documents with terms: aaa bbb @@ -64,7 +64,7 @@ public class TestSegmentTermEnum extends LuceneTestCase { verifyDocFreq(); // merge segments by optimizing the index - writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer()).setOpenMode(OpenMode.APPEND)); + writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)).setOpenMode(OpenMode.APPEND)); writer.optimize(); writer.close(); @@ -74,7 +74,7 @@ public class TestSegmentTermEnum extends LuceneTestCase { public void testPrevTermAtEnd() throws IOException { - IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).setCodecProvider(_TestUtil.alwaysCodec("Standard"))); + IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setCodecProvider(_TestUtil.alwaysCodec("Standard"))); addDoc(writer, "aaa bbb"); writer.close(); SegmentReader reader = getOnlySegmentReader(IndexReader.open(dir, false)); diff --git a/lucene/src/test/org/apache/lucene/index/TestSnapshotDeletionPolicy.java b/lucene/src/test/org/apache/lucene/index/TestSnapshotDeletionPolicy.java index a4f138c40fa..6d325a79bd3 100644 --- a/lucene/src/test/org/apache/lucene/index/TestSnapshotDeletionPolicy.java +++ b/lucene/src/test/org/apache/lucene/index/TestSnapshotDeletionPolicy.java @@ -45,7 +45,7 @@ public class TestSnapshotDeletionPolicy extends LuceneTestCase { public static final String INDEX_PATH = "test.snapshots"; protected IndexWriterConfig getConfig(Random random, IndexDeletionPolicy dp) { - IndexWriterConfig conf = newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer()); + IndexWriterConfig conf = newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)); if (dp != null) { conf.setIndexDeletionPolicy(dp); } @@ -106,7 +106,7 @@ public class TestSnapshotDeletionPolicy extends LuceneTestCase { SnapshotDeletionPolicy dp = getDeletionPolicy(); final IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( - TEST_VERSION_CURRENT, new MockAnalyzer()).setIndexDeletionPolicy(dp) + TEST_VERSION_CURRENT, new MockAnalyzer(random)).setIndexDeletionPolicy(dp) .setMaxBufferedDocs(2)); writer.commit(); diff --git a/lucene/src/test/org/apache/lucene/index/TestStressIndexing.java b/lucene/src/test/org/apache/lucene/index/TestStressIndexing.java index 00f5448f9e3..985af977eb0 100644 --- a/lucene/src/test/org/apache/lucene/index/TestStressIndexing.java +++ b/lucene/src/test/org/apache/lucene/index/TestStressIndexing.java @@ -114,7 +114,7 @@ public class TestStressIndexing extends LuceneTestCase { */ public void runStressTest(Directory directory, MergeScheduler mergeScheduler) throws Exception { IndexWriter modifier = new IndexWriter(directory, newIndexWriterConfig( - TEST_VERSION_CURRENT, new MockAnalyzer()) + TEST_VERSION_CURRENT, new MockAnalyzer(random)) .setOpenMode(OpenMode.CREATE).setMaxBufferedDocs(10).setMergeScheduler( mergeScheduler)); modifier.commit(); diff --git a/lucene/src/test/org/apache/lucene/index/TestStressIndexing2.java b/lucene/src/test/org/apache/lucene/index/TestStressIndexing2.java index 0995ead1834..a0fbe6d1f55 100644 --- a/lucene/src/test/org/apache/lucene/index/TestStressIndexing2.java +++ b/lucene/src/test/org/apache/lucene/index/TestStressIndexing2.java @@ -148,7 +148,7 @@ public class TestStressIndexing2 extends LuceneTestCase { public DocsAndWriter indexRandomIWReader(int nThreads, int iterations, int range, Directory dir) throws IOException, InterruptedException { Map docs = new HashMap(); IndexWriter w = new MockIndexWriter(dir, newIndexWriterConfig( - TEST_VERSION_CURRENT, new MockAnalyzer()).setOpenMode(OpenMode.CREATE).setRAMBufferSizeMB( + TEST_VERSION_CURRENT, new MockAnalyzer(random)).setOpenMode(OpenMode.CREATE).setRAMBufferSizeMB( 0.1).setMaxBufferedDocs(maxBufferedDocs).setMergePolicy(newLogMergePolicy())); w.setInfoStream(VERBOSE ? System.out : null); w.commit(); @@ -199,44 +199,39 @@ public class TestStressIndexing2 extends LuceneTestCase { public Map indexRandom(int nThreads, int iterations, int range, Directory dir, int maxThreadStates, boolean doReaderPooling) throws IOException, InterruptedException { Map docs = new HashMap(); - for(int iter=0;iter<3;iter++) { - if (VERBOSE) { - System.out.println("TEST: iter=" + iter); - } - IndexWriter w = new MockIndexWriter(dir, newIndexWriterConfig( - TEST_VERSION_CURRENT, new MockAnalyzer()).setOpenMode(OpenMode.CREATE) - .setRAMBufferSizeMB(0.1).setMaxBufferedDocs(maxBufferedDocs).setMaxThreadStates(maxThreadStates) - .setReaderPooling(doReaderPooling).setMergePolicy(newLogMergePolicy())); - w.setInfoStream(VERBOSE ? System.out : null); - LogMergePolicy lmp = (LogMergePolicy) w.getConfig().getMergePolicy(); - lmp.setUseCompoundFile(false); - lmp.setMergeFactor(mergeFactor); + IndexWriter w = new MockIndexWriter(dir, newIndexWriterConfig( + TEST_VERSION_CURRENT, new MockAnalyzer(random)).setOpenMode(OpenMode.CREATE) + .setRAMBufferSizeMB(0.1).setMaxBufferedDocs(maxBufferedDocs).setIndexerThreadPool(new ThreadAffinityDocumentsWriterThreadPool(maxThreadStates)) + .setReaderPooling(doReaderPooling).setMergePolicy(newLogMergePolicy())); + w.setInfoStream(VERBOSE ? System.out : null); + LogMergePolicy lmp = (LogMergePolicy) w.getConfig().getMergePolicy(); + lmp.setUseCompoundFile(false); + lmp.setMergeFactor(mergeFactor); - threads = new IndexingThread[nThreads]; - for (int i=0; i docs, Directory dir) throws IOException { - IndexWriter w = new IndexWriter(dir, LuceneTestCase.newIndexWriterConfig(random, TEST_VERSION_CURRENT, new MockAnalyzer()).setMergePolicy(newLogMergePolicy())); + IndexWriter w = new IndexWriter(dir, LuceneTestCase.newIndexWriterConfig(random, TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMergePolicy(newLogMergePolicy())); // index all docs in a single thread Iterator iter = docs.values().iterator(); diff --git a/lucene/src/test/org/apache/lucene/index/TestTermVectorsWriter.java b/lucene/src/test/org/apache/lucene/index/TestTermVectorsWriter.java index 5f1edbe6e66..d41c0793a1f 100644 --- a/lucene/src/test/org/apache/lucene/index/TestTermVectorsWriter.java +++ b/lucene/src/test/org/apache/lucene/index/TestTermVectorsWriter.java @@ -39,7 +39,7 @@ public class TestTermVectorsWriter extends LuceneTestCase { public void testDoubleOffsetCounting() throws Exception { Directory dir = newDirectory(); IndexWriter w = new IndexWriter(dir, newIndexWriterConfig( - TEST_VERSION_CURRENT, new MockAnalyzer())); + TEST_VERSION_CURRENT, new MockAnalyzer(random))); Document doc = new Document(); Field f = newField("field", "abcd", Field.Store.NO, Field.Index.NOT_ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS); doc.add(f); @@ -74,7 +74,7 @@ public class TestTermVectorsWriter extends LuceneTestCase { // LUCENE-1442 public void testDoubleOffsetCounting2() throws Exception { Directory dir = newDirectory(); - IndexWriter w = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer())); + IndexWriter w = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random))); Document doc = new Document(); Field f = newField("field", "abcd", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS); doc.add(f); @@ -96,7 +96,7 @@ public class TestTermVectorsWriter extends LuceneTestCase { // LUCENE-1448 public void testEndOffsetPositionCharAnalyzer() throws Exception { Directory dir = newDirectory(); - IndexWriter w = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer())); + IndexWriter w = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random))); Document doc = new Document(); Field f = newField("field", "abcd ", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS); doc.add(f); @@ -118,7 +118,7 @@ public class TestTermVectorsWriter extends LuceneTestCase { // LUCENE-1448 public void testEndOffsetPositionWithCachingTokenFilter() throws Exception { Directory dir = newDirectory(); - Analyzer analyzer = new MockAnalyzer(); + Analyzer analyzer = new MockAnalyzer(random); IndexWriter w = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, analyzer)); Document doc = new Document(); TokenStream stream = new CachingTokenFilter(analyzer.tokenStream("field", new StringReader("abcd "))); @@ -143,7 +143,7 @@ public class TestTermVectorsWriter extends LuceneTestCase { public void testEndOffsetPositionStopFilter() throws Exception { Directory dir = newDirectory(); IndexWriter w = new IndexWriter(dir, newIndexWriterConfig( - TEST_VERSION_CURRENT, new MockAnalyzer(MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET, true))); + TEST_VERSION_CURRENT, new MockAnalyzer(random, MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET, true))); Document doc = new Document(); Field f = newField("field", "abcd the", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS); doc.add(f); @@ -166,7 +166,7 @@ public class TestTermVectorsWriter extends LuceneTestCase { public void testEndOffsetPositionStandard() throws Exception { Directory dir = newDirectory(); IndexWriter w = new IndexWriter(dir, newIndexWriterConfig( - TEST_VERSION_CURRENT, new MockAnalyzer())); + TEST_VERSION_CURRENT, new MockAnalyzer(random))); Document doc = new Document(); Field f = newField("field", "abcd the ", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS); @@ -197,7 +197,7 @@ public class TestTermVectorsWriter extends LuceneTestCase { public void testEndOffsetPositionStandardEmptyField() throws Exception { Directory dir = newDirectory(); IndexWriter w = new IndexWriter(dir, newIndexWriterConfig( - TEST_VERSION_CURRENT, new MockAnalyzer())); + TEST_VERSION_CURRENT, new MockAnalyzer(random))); Document doc = new Document(); Field f = newField("field", "", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS); @@ -225,7 +225,7 @@ public class TestTermVectorsWriter extends LuceneTestCase { public void testEndOffsetPositionStandardEmptyField2() throws Exception { Directory dir = newDirectory(); IndexWriter w = new IndexWriter(dir, newIndexWriterConfig( - TEST_VERSION_CURRENT, new MockAnalyzer())); + TEST_VERSION_CURRENT, new MockAnalyzer(random))); Document doc = new Document(); Field f = newField("field", "abcd", Field.Store.NO, @@ -259,7 +259,7 @@ public class TestTermVectorsWriter extends LuceneTestCase { Directory dir = newDirectory(); for(int iter=0;iter<2;iter++) { IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( - TEST_VERSION_CURRENT, new MockAnalyzer()) + TEST_VERSION_CURRENT, new MockAnalyzer(random)) .setMaxBufferedDocs(2).setRAMBufferSizeMB( IndexWriterConfig.DISABLE_AUTO_FLUSH).setMergeScheduler( new SerialMergeScheduler()).setMergePolicy( @@ -292,7 +292,7 @@ public class TestTermVectorsWriter extends LuceneTestCase { reader.close(); writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, - new MockAnalyzer()).setMaxBufferedDocs(2) + new MockAnalyzer(random)).setMaxBufferedDocs(2) .setRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH) .setMergeScheduler(new SerialMergeScheduler()).setMergePolicy( new LogDocMergePolicy())); @@ -310,7 +310,7 @@ public class TestTermVectorsWriter extends LuceneTestCase { Directory dir = newDirectory(); for(int iter=0;iter<2;iter++) { IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( - TEST_VERSION_CURRENT, new MockAnalyzer()) + TEST_VERSION_CURRENT, new MockAnalyzer(random)) .setMaxBufferedDocs(2).setRAMBufferSizeMB( IndexWriterConfig.DISABLE_AUTO_FLUSH).setMergeScheduler( new SerialMergeScheduler()).setMergePolicy( @@ -347,7 +347,7 @@ public class TestTermVectorsWriter extends LuceneTestCase { public void testTermVectorCorruption3() throws IOException { Directory dir = newDirectory(); IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( - TEST_VERSION_CURRENT, new MockAnalyzer()) + TEST_VERSION_CURRENT, new MockAnalyzer(random)) .setMaxBufferedDocs(2).setRAMBufferSizeMB( IndexWriterConfig.DISABLE_AUTO_FLUSH).setMergeScheduler( new SerialMergeScheduler()).setMergePolicy(new LogDocMergePolicy())); @@ -367,7 +367,7 @@ public class TestTermVectorsWriter extends LuceneTestCase { writer.close(); writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, - new MockAnalyzer()).setMaxBufferedDocs(2) + new MockAnalyzer(random)).setMaxBufferedDocs(2) .setRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH) .setMergeScheduler(new SerialMergeScheduler()).setMergePolicy( new LogDocMergePolicy())); @@ -390,7 +390,7 @@ public class TestTermVectorsWriter extends LuceneTestCase { public void testNoTermVectorAfterTermVector() throws IOException { Directory dir = newDirectory(); IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig( - TEST_VERSION_CURRENT, new MockAnalyzer())); + TEST_VERSION_CURRENT, new MockAnalyzer(random))); Document document = new Document(); document.add(newField("tvtest", "a b c", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.YES)); @@ -417,7 +417,7 @@ public class TestTermVectorsWriter extends LuceneTestCase { public void testNoTermVectorAfterTermVectorMerge() throws IOException { Directory dir = newDirectory(); IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig( - TEST_VERSION_CURRENT, new MockAnalyzer())); + TEST_VERSION_CURRENT, new MockAnalyzer(random))); Document document = new Document(); document.add(newField("tvtest", "a b c", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.YES)); diff --git a/lucene/src/test/org/apache/lucene/index/TestThreadedOptimize.java b/lucene/src/test/org/apache/lucene/index/TestThreadedOptimize.java index 3516079d208..fb2e99a7461 100644 --- a/lucene/src/test/org/apache/lucene/index/TestThreadedOptimize.java +++ b/lucene/src/test/org/apache/lucene/index/TestThreadedOptimize.java @@ -32,7 +32,7 @@ import java.util.Random; public class TestThreadedOptimize extends LuceneTestCase { - private static final Analyzer ANALYZER = new MockAnalyzer(MockTokenizer.SIMPLE, true); + private static final Analyzer ANALYZER = new MockAnalyzer(random, MockTokenizer.SIMPLE, true); private final static int NUM_THREADS = 3; //private final static int NUM_THREADS = 5; diff --git a/lucene/src/test/org/apache/lucene/index/TestTieredMergePolicy.java b/lucene/src/test/org/apache/lucene/index/TestTieredMergePolicy.java new file mode 100644 index 00000000000..8e285245293 --- /dev/null +++ b/lucene/src/test/org/apache/lucene/index/TestTieredMergePolicy.java @@ -0,0 +1,109 @@ +package org.apache.lucene.index; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.store.Directory; +import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.util._TestUtil; + +public class TestTieredMergePolicy extends LuceneTestCase { + + public void testExpungeDeletes() throws Exception { + Directory dir = newDirectory(); + IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)); + TieredMergePolicy tmp = newTieredMergePolicy(); + conf.setMergePolicy(tmp); + conf.setMaxBufferedDocs(4); + tmp.setMaxMergeAtOnce(100); + tmp.setSegmentsPerTier(100); + tmp.setExpungeDeletesPctAllowed(30.0); + IndexWriter w = new IndexWriter(dir, conf); + w.setInfoStream(VERBOSE ? System.out : null); + for(int i=0;i<80;i++) { + Document doc = new Document(); + doc.add(newField("content", "aaa " + (i%4), Field.Store.NO, Field.Index.ANALYZED)); + w.addDocument(doc); + } + assertEquals(80, w.maxDoc()); + assertEquals(80, w.numDocs()); + + if (VERBOSE) { + System.out.println("\nTEST: delete docs"); + } + w.deleteDocuments(new Term("content", "0")); + w.expungeDeletes(); + + assertEquals(80, w.maxDoc()); + assertEquals(60, w.numDocs()); + + if (VERBOSE) { + System.out.println("\nTEST: expunge2"); + } + tmp.setExpungeDeletesPctAllowed(10.0); + w.expungeDeletes(); + assertEquals(60, w.maxDoc()); + assertEquals(60, w.numDocs()); + w.close(); + dir.close(); + } + + public void testPartialOptimize() throws Exception { + for(int iter=0;iter<10*RANDOM_MULTIPLIER;iter++) { + if (VERBOSE) { + System.out.println("TEST: iter=" + iter); + } + Directory dir = newDirectory(); + IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)); + conf.setMergeScheduler(new SerialMergeScheduler()); + TieredMergePolicy tmp = newTieredMergePolicy(); + conf.setMergePolicy(tmp); + conf.setMaxBufferedDocs(2); + tmp.setMaxMergeAtOnce(3); + tmp.setSegmentsPerTier(6); + + IndexWriter w = new IndexWriter(dir, conf); + w.setInfoStream(VERBOSE ? System.out : null); + int maxCount = 0; + final int numDocs = _TestUtil.nextInt(random, 20, 100); + for(int i=0;i data = new HashMap(); data.put("index", "Rolled back to 1-"+id); @@ -127,7 +127,7 @@ public class TestTransactionRollback extends LuceneTestCase { dir = newDirectory(); //Build index, of records 1 to 100, committing after each batch of 10 IndexDeletionPolicy sdp=new KeepAllDeletionPolicy(); - IndexWriter w=new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer()).setIndexDeletionPolicy(sdp)); + IndexWriter w=new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)).setIndexDeletionPolicy(sdp)); for(int currentRecordId=1;currentRecordId<=100;currentRecordId++) { Document doc=new Document(); doc.add(newField(FIELD_RECORD_ID,""+currentRecordId,Field.Store.YES,Field.Index.ANALYZED)); @@ -201,7 +201,7 @@ public class TestTransactionRollback extends LuceneTestCase { for(int i=0;i<2;i++) { // Unless you specify a prior commit point, rollback // should not work: - new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer()) + new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)) .setIndexDeletionPolicy(new DeleteLastCommitPolicy())).close(); IndexReader r = IndexReader.open(dir, true); assertEquals(100, r.numDocs()); diff --git a/lucene/src/test/org/apache/lucene/index/TestTransactions.java b/lucene/src/test/org/apache/lucene/index/TestTransactions.java index 467bed726c9..fd7312017a7 100644 --- a/lucene/src/test/org/apache/lucene/index/TestTransactions.java +++ b/lucene/src/test/org/apache/lucene/index/TestTransactions.java @@ -93,7 +93,7 @@ public class TestTransactions extends LuceneTestCase { IndexWriter writer1 = new IndexWriter( dir1, - newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()). + newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)). setMaxBufferedDocs(3). setMergeScheduler(new ConcurrentMergeScheduler()). setMergePolicy(newLogMergePolicy(2)) @@ -104,7 +104,7 @@ public class TestTransactions extends LuceneTestCase { // happen @ different times IndexWriter writer2 = new IndexWriter( dir2, - newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer()). + newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)). setMaxBufferedDocs(2). setMergeScheduler(new ConcurrentMergeScheduler()). setMergePolicy(newLogMergePolicy(3)) @@ -189,7 +189,7 @@ public class TestTransactions extends LuceneTestCase { } public void initIndex(Directory dir) throws Throwable { - IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer())); + IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random))); for(int j=0; j<7; j++) { Document d = new Document(); int n = random.nextInt(); diff --git a/lucene/src/test/org/apache/lucene/index/codecs/preflex/TestSurrogates.java b/lucene/src/test/org/apache/lucene/index/codecs/preflex/TestSurrogates.java index 97c5908d509..8fa688a1cbd 100644 --- a/lucene/src/test/org/apache/lucene/index/codecs/preflex/TestSurrogates.java +++ b/lucene/src/test/org/apache/lucene/index/codecs/preflex/TestSurrogates.java @@ -275,7 +275,7 @@ public class TestSurrogates extends LuceneTestCase { RandomIndexWriter w = new RandomIndexWriter(random, dir, newIndexWriterConfig( TEST_VERSION_CURRENT, - new MockAnalyzer()).setCodecProvider(_TestUtil.alwaysCodec(new PreFlexRWCodec()))); + new MockAnalyzer(random)).setCodecProvider(_TestUtil.alwaysCodec(new PreFlexRWCodec()))); final int numField = _TestUtil.nextInt(random, 2, 5); diff --git a/lucene/src/test/org/apache/lucene/index/values/TestDocValuesIndexing.java b/lucene/src/test/org/apache/lucene/index/values/TestDocValuesIndexing.java index fe784a53b47..9ca0e86d35d 100644 --- a/lucene/src/test/org/apache/lucene/index/values/TestDocValuesIndexing.java +++ b/lucene/src/test/org/apache/lucene/index/values/TestDocValuesIndexing.java @@ -26,23 +26,21 @@ import java.util.List; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.document.AbstractField; +import org.apache.lucene.document.DocValuesField; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; -import org.apache.lucene.document.DocValuesField; import org.apache.lucene.document.Field.Index; import org.apache.lucene.document.Field.Store; import org.apache.lucene.index.CorruptIndexException; -import org.apache.lucene.index.Fields; -import org.apache.lucene.index.FieldsEnum; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.LogDocMergePolicy; import org.apache.lucene.index.LogMergePolicy; -import org.apache.lucene.index.MergePolicy; -import org.apache.lucene.index.MultiFields; +import org.apache.lucene.index.MultiPerDocValues; import org.apache.lucene.index.Term; import org.apache.lucene.index.codecs.CodecProvider; +import org.apache.lucene.index.codecs.PerDocValues; import org.apache.lucene.index.codecs.docvalues.DocValuesCodecProvider; import org.apache.lucene.index.values.DocValues.MissingValue; import org.apache.lucene.index.values.DocValues.Source; @@ -111,6 +109,15 @@ public class TestDocValuesIndexing extends LuceneTestCase { writer.addDocument(doc); } writer.commit(); + for (int i = 0; i < 5; i++) { + Document doc = new Document(); + DocValuesField valuesField = new DocValuesField("docId1"); + valuesField.setFloat(i); + doc.add(valuesField); + doc.add(new Field("docId1", "" + i, Store.NO, Index.ANALYZED)); + writer.addDocument(doc); + } + writer.commit(); writer.optimize(true); writer.close(true); @@ -120,11 +127,11 @@ public class TestDocValuesIndexing extends LuceneTestCase { IndexSearcher searcher = new IndexSearcher(reader); QueryParser parser = new QueryParser(TEST_VERSION_CURRENT, "docId", - new MockAnalyzer()); + new MockAnalyzer(random)); TopDocs search = searcher.search(parser.parse("0 OR 1 OR 2 OR 3 OR 4"), 10); assertEquals(5, search.totalHits); ScoreDoc[] scoreDocs = search.scoreDocs; - DocValues docValues = MultiFields.getDocValues(reader, "docId"); + DocValues docValues = MultiPerDocValues.getPerDocs(reader).docValues("docId"); Source source = docValues.getSource(); for (int i = 0; i < scoreDocs.length; i++) { assertEquals(i, scoreDocs[i].doc); @@ -249,19 +256,11 @@ public class TestDocValuesIndexing extends LuceneTestCase { private IndexWriterConfig writerConfig(boolean useCompoundFile) { final IndexWriterConfig cfg = newIndexWriterConfig(TEST_VERSION_CURRENT, - new MockAnalyzer()); + new MockAnalyzer(random)); cfg.setMergePolicy(newLogMergePolicy(random)); - MergePolicy mergePolicy = cfg.getMergePolicy(); - if (mergePolicy instanceof LogMergePolicy) { - LogMergePolicy policy = ((LogMergePolicy) mergePolicy); - policy.setUseCompoundFile(useCompoundFile); - policy.setRequireContiguousMerge(true); - } else if (useCompoundFile) { - LogMergePolicy policy = new LogDocMergePolicy(); - policy.setUseCompoundFile(useCompoundFile); - policy.setRequireContiguousMerge(true); - cfg.setMergePolicy(policy); - } + LogMergePolicy policy = new LogDocMergePolicy(); + cfg.setMergePolicy(policy); + policy.setUseCompoundFile(useCompoundFile); cfg.setCodecProvider(provider); return cfg; } @@ -471,17 +470,15 @@ public class TestDocValuesIndexing extends LuceneTestCase { private DocValues getDocValues(IndexReader reader, String field) throws IOException { boolean optimized = reader.isOptimized(); - Fields fields = optimized ? reader.getSequentialSubReaders()[0].fields() - : MultiFields.getFields(reader); + PerDocValues perDoc = optimized ? reader.getSequentialSubReaders()[0].perDocValues() + : MultiPerDocValues.getPerDocs(reader); switch (random.nextInt(optimized ? 3 : 2)) { // case 2 only if optimized case 0: - return fields.docValues(field); + return perDoc.docValues(field); case 1: - FieldsEnum iterator = fields.iterator(); - String name; - while ((name = iterator.next()) != null) { - if (name.equals(field)) - return iterator.docValues(); + DocValues docValues = perDoc.docValues(field); + if (docValues != null) { + return docValues; } throw new RuntimeException("no such field " + field); case 2:// this only works if we are on an optimized index! diff --git a/lucene/src/test/org/apache/lucene/queryParser/TestMultiFieldQueryParser.java b/lucene/src/test/org/apache/lucene/queryParser/TestMultiFieldQueryParser.java index c6a3a3829af..4611aca906b 100644 --- a/lucene/src/test/org/apache/lucene/queryParser/TestMultiFieldQueryParser.java +++ b/lucene/src/test/org/apache/lucene/queryParser/TestMultiFieldQueryParser.java @@ -69,7 +69,7 @@ public class TestMultiFieldQueryParser extends LuceneTestCase { public void testSimple() throws Exception { String[] fields = {"b", "t"}; - MultiFieldQueryParser mfqp = new MultiFieldQueryParser(TEST_VERSION_CURRENT, fields, new MockAnalyzer()); + MultiFieldQueryParser mfqp = new MultiFieldQueryParser(TEST_VERSION_CURRENT, fields, new MockAnalyzer(random)); Query q = mfqp.parse("one"); assertEquals("b:one t:one", q.toString()); @@ -132,7 +132,7 @@ public class TestMultiFieldQueryParser extends LuceneTestCase { boosts.put("b", Float.valueOf(5)); boosts.put("t", Float.valueOf(10)); String[] fields = {"b", "t"}; - MultiFieldQueryParser mfqp = new MultiFieldQueryParser(TEST_VERSION_CURRENT, fields, new MockAnalyzer(), boosts); + MultiFieldQueryParser mfqp = new MultiFieldQueryParser(TEST_VERSION_CURRENT, fields, new MockAnalyzer(random), boosts); //Check for simple @@ -158,24 +158,24 @@ public class TestMultiFieldQueryParser extends LuceneTestCase { public void testStaticMethod1() throws ParseException { String[] fields = {"b", "t"}; String[] queries = {"one", "two"}; - Query q = MultiFieldQueryParser.parse(TEST_VERSION_CURRENT, queries, fields, new MockAnalyzer()); + Query q = MultiFieldQueryParser.parse(TEST_VERSION_CURRENT, queries, fields, new MockAnalyzer(random)); assertEquals("b:one t:two", q.toString()); String[] queries2 = {"+one", "+two"}; - q = MultiFieldQueryParser.parse(TEST_VERSION_CURRENT, queries2, fields, new MockAnalyzer()); + q = MultiFieldQueryParser.parse(TEST_VERSION_CURRENT, queries2, fields, new MockAnalyzer(random)); assertEquals("(+b:one) (+t:two)", q.toString()); String[] queries3 = {"one", "+two"}; - q = MultiFieldQueryParser.parse(TEST_VERSION_CURRENT, queries3, fields, new MockAnalyzer()); + q = MultiFieldQueryParser.parse(TEST_VERSION_CURRENT, queries3, fields, new MockAnalyzer(random)); assertEquals("b:one (+t:two)", q.toString()); String[] queries4 = {"one +more", "+two"}; - q = MultiFieldQueryParser.parse(TEST_VERSION_CURRENT, queries4, fields, new MockAnalyzer()); + q = MultiFieldQueryParser.parse(TEST_VERSION_CURRENT, queries4, fields, new MockAnalyzer(random)); assertEquals("(b:one +b:more) (+t:two)", q.toString()); String[] queries5 = {"blah"}; try { - q = MultiFieldQueryParser.parse(TEST_VERSION_CURRENT, queries5, fields, new MockAnalyzer()); + q = MultiFieldQueryParser.parse(TEST_VERSION_CURRENT, queries5, fields, new MockAnalyzer(random)); fail(); } catch(IllegalArgumentException e) { // expected exception, array length differs @@ -197,15 +197,15 @@ public class TestMultiFieldQueryParser extends LuceneTestCase { public void testStaticMethod2() throws ParseException { String[] fields = {"b", "t"}; BooleanClause.Occur[] flags = {BooleanClause.Occur.MUST, BooleanClause.Occur.MUST_NOT}; - Query q = MultiFieldQueryParser.parse(TEST_VERSION_CURRENT, "one", fields, flags, new MockAnalyzer()); + Query q = MultiFieldQueryParser.parse(TEST_VERSION_CURRENT, "one", fields, flags, new MockAnalyzer(random)); assertEquals("+b:one -t:one", q.toString()); - q = MultiFieldQueryParser.parse(TEST_VERSION_CURRENT, "one two", fields, flags, new MockAnalyzer()); + q = MultiFieldQueryParser.parse(TEST_VERSION_CURRENT, "one two", fields, flags, new MockAnalyzer(random)); assertEquals("+(b:one b:two) -(t:one t:two)", q.toString()); try { BooleanClause.Occur[] flags2 = {BooleanClause.Occur.MUST}; - q = MultiFieldQueryParser.parse(TEST_VERSION_CURRENT, "blah", fields, flags2, new MockAnalyzer()); + q = MultiFieldQueryParser.parse(TEST_VERSION_CURRENT, "blah", fields, flags2, new MockAnalyzer(random)); fail(); } catch(IllegalArgumentException e) { // expected exception, array length differs @@ -217,15 +217,15 @@ public class TestMultiFieldQueryParser extends LuceneTestCase { //int[] flags = {MultiFieldQueryParser.REQUIRED_FIELD, MultiFieldQueryParser.PROHIBITED_FIELD}; BooleanClause.Occur[] flags = {BooleanClause.Occur.MUST, BooleanClause.Occur.MUST_NOT}; - Query q = MultiFieldQueryParser.parse(TEST_VERSION_CURRENT, "one", fields, flags, new MockAnalyzer());//, fields, flags, new MockAnalyzer()); + Query q = MultiFieldQueryParser.parse(TEST_VERSION_CURRENT, "one", fields, flags, new MockAnalyzer(random));//, fields, flags, new MockAnalyzer(random)); assertEquals("+b:one -t:one", q.toString()); - q = MultiFieldQueryParser.parse(TEST_VERSION_CURRENT, "one two", fields, flags, new MockAnalyzer()); + q = MultiFieldQueryParser.parse(TEST_VERSION_CURRENT, "one two", fields, flags, new MockAnalyzer(random)); assertEquals("+(b:one b:two) -(t:one t:two)", q.toString()); try { BooleanClause.Occur[] flags2 = {BooleanClause.Occur.MUST}; - q = MultiFieldQueryParser.parse(TEST_VERSION_CURRENT, "blah", fields, flags2, new MockAnalyzer()); + q = MultiFieldQueryParser.parse(TEST_VERSION_CURRENT, "blah", fields, flags2, new MockAnalyzer(random)); fail(); } catch(IllegalArgumentException e) { // expected exception, array length differs @@ -237,12 +237,12 @@ public class TestMultiFieldQueryParser extends LuceneTestCase { String[] fields = {"f1", "f2", "f3"}; BooleanClause.Occur[] flags = {BooleanClause.Occur.MUST, BooleanClause.Occur.MUST_NOT, BooleanClause.Occur.SHOULD}; - Query q = MultiFieldQueryParser.parse(TEST_VERSION_CURRENT, queries, fields, flags, new MockAnalyzer()); + Query q = MultiFieldQueryParser.parse(TEST_VERSION_CURRENT, queries, fields, flags, new MockAnalyzer(random)); assertEquals("+f1:one -f2:two f3:three", q.toString()); try { BooleanClause.Occur[] flags2 = {BooleanClause.Occur.MUST}; - q = MultiFieldQueryParser.parse(TEST_VERSION_CURRENT, queries, fields, flags2, new MockAnalyzer()); + q = MultiFieldQueryParser.parse(TEST_VERSION_CURRENT, queries, fields, flags2, new MockAnalyzer(random)); fail(); } catch(IllegalArgumentException e) { // expected exception, array length differs @@ -253,12 +253,12 @@ public class TestMultiFieldQueryParser extends LuceneTestCase { String[] queries = {"one", "two"}; String[] fields = {"b", "t"}; BooleanClause.Occur[] flags = {BooleanClause.Occur.MUST, BooleanClause.Occur.MUST_NOT}; - Query q = MultiFieldQueryParser.parse(TEST_VERSION_CURRENT, queries, fields, flags, new MockAnalyzer()); + Query q = MultiFieldQueryParser.parse(TEST_VERSION_CURRENT, queries, fields, flags, new MockAnalyzer(random)); assertEquals("+b:one -t:two", q.toString()); try { BooleanClause.Occur[] flags2 = {BooleanClause.Occur.MUST}; - q = MultiFieldQueryParser.parse(TEST_VERSION_CURRENT, queries, fields, flags2, new MockAnalyzer()); + q = MultiFieldQueryParser.parse(TEST_VERSION_CURRENT, queries, fields, flags2, new MockAnalyzer(random)); fail(); } catch(IllegalArgumentException e) { // expected exception, array length differs @@ -280,7 +280,7 @@ public class TestMultiFieldQueryParser extends LuceneTestCase { } public void testStopWordSearching() throws Exception { - Analyzer analyzer = new MockAnalyzer(); + Analyzer analyzer = new MockAnalyzer(random); Directory ramDir = newDirectory(); IndexWriter iw = new IndexWriter(ramDir, newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer)); Document doc = new Document(); @@ -303,7 +303,7 @@ public class TestMultiFieldQueryParser extends LuceneTestCase { * Return empty tokens for field "f1". */ private static class AnalyzerReturningNull extends Analyzer { - MockAnalyzer stdAnalyzer = new MockAnalyzer(); + MockAnalyzer stdAnalyzer = new MockAnalyzer(random); public AnalyzerReturningNull() { } diff --git a/lucene/src/test/org/apache/lucene/queryParser/TestQueryParser.java b/lucene/src/test/org/apache/lucene/queryParser/TestQueryParser.java index f8567ef7578..ce0c00fc003 100644 --- a/lucene/src/test/org/apache/lucene/queryParser/TestQueryParser.java +++ b/lucene/src/test/org/apache/lucene/queryParser/TestQueryParser.java @@ -148,7 +148,7 @@ public class TestQueryParser extends LuceneTestCase { public QueryParser getParser(Analyzer a) throws Exception { if (a == null) - a = new MockAnalyzer(MockTokenizer.SIMPLE, true); + a = new MockAnalyzer(random, MockTokenizer.SIMPLE, true); QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "field", a); qp.setDefaultOperator(QueryParser.OR_OPERATOR); return qp; @@ -218,7 +218,7 @@ public class TestQueryParser extends LuceneTestCase { public Query getQueryDOA(String query, Analyzer a) throws Exception { if (a == null) - a = new MockAnalyzer(MockTokenizer.SIMPLE, true); + a = new MockAnalyzer(random, MockTokenizer.SIMPLE, true); QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "field", a); qp.setDefaultOperator(QueryParser.AND_OPERATOR); return qp.parse(query); @@ -339,8 +339,8 @@ public class TestQueryParser extends LuceneTestCase { public void testSimple() throws Exception { assertQueryEquals("term term term", null, "term term term"); - assertQueryEquals("türm term term", new MockAnalyzer(), "türm term term"); - assertQueryEquals("ümlaut", new MockAnalyzer(), "ümlaut"); + assertQueryEquals("türm term term", new MockAnalyzer(random), "türm term term"); + assertQueryEquals("ümlaut", new MockAnalyzer(random), "ümlaut"); // FIXME: enhance MockAnalyzer to be able to support this // it must no longer extend CharTokenizer @@ -400,7 +400,7 @@ public class TestQueryParser extends LuceneTestCase { assertQueryEquals("+title:(dog OR cat) -author:\"bob dole\"", null, "+(title:dog title:cat) -author:\"bob dole\""); - QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "field", new MockAnalyzer()); + QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "field", new MockAnalyzer(random)); // make sure OR is the default: assertEquals(QueryParser.OR_OPERATOR, qp.getDefaultOperator()); qp.setDefaultOperator(QueryParser.AND_OPERATOR); @@ -410,7 +410,7 @@ public class TestQueryParser extends LuceneTestCase { } public void testPunct() throws Exception { - Analyzer a = new MockAnalyzer(MockTokenizer.WHITESPACE, false); + Analyzer a = new MockAnalyzer(random, MockTokenizer.WHITESPACE, false); assertQueryEquals("a&b", a, "a&b"); assertQueryEquals("a&&b", a, "a&&b"); assertQueryEquals(".NET", a, ".NET"); @@ -430,7 +430,7 @@ public class TestQueryParser extends LuceneTestCase { assertQueryEquals("term 1.0 1 2", null, "term"); assertQueryEquals("term term1 term2", null, "term term term"); - Analyzer a = new MockAnalyzer(MockTokenizer.WHITESPACE, true); + Analyzer a = new MockAnalyzer(random, MockTokenizer.WHITESPACE, true); assertQueryEquals("3", a, "3"); assertQueryEquals("term 1.0 1 2", a, "term 1.0 1 2"); assertQueryEquals("term term1 term2", a, "term term1 term2"); @@ -558,7 +558,7 @@ public class TestQueryParser extends LuceneTestCase { assertEquals(MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT, ((TermRangeQuery)getQuery("[ a TO z]", null)).getRewriteMethod()); - QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "field", new MockAnalyzer(MockTokenizer.SIMPLE, true)); + QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "field", new MockAnalyzer(random, MockTokenizer.SIMPLE, true)); qp.setMultiTermRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE); assertEquals(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE,((TermRangeQuery)qp.parse("[ a TO z]")).getRewriteMethod()); @@ -618,7 +618,7 @@ public class TestQueryParser extends LuceneTestCase { final String defaultField = "default"; final String monthField = "month"; final String hourField = "hour"; - QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "field", new MockAnalyzer(MockTokenizer.SIMPLE, true)); + QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "field", new MockAnalyzer(random, MockTokenizer.SIMPLE, true)); // set a field specific date resolution qp.setDateResolution(monthField, DateTools.Resolution.MONTH); @@ -651,7 +651,7 @@ public class TestQueryParser extends LuceneTestCase { } public void testEscaped() throws Exception { - Analyzer a = new MockAnalyzer(MockTokenizer.WHITESPACE, false); + Analyzer a = new MockAnalyzer(random, MockTokenizer.WHITESPACE, false); /*assertQueryEquals("\\[brackets", a, "\\[brackets"); assertQueryEquals("\\[brackets", null, "brackets"); @@ -745,7 +745,7 @@ public class TestQueryParser extends LuceneTestCase { } public void testQueryStringEscaping() throws Exception { - Analyzer a = new MockAnalyzer(MockTokenizer.WHITESPACE, false); + Analyzer a = new MockAnalyzer(random, MockTokenizer.WHITESPACE, false); assertEscapedQueryEquals("a-b:c", a, "a\\-b\\:c"); assertEscapedQueryEquals("a+b:c", a, "a\\+b\\:c"); @@ -831,7 +831,7 @@ public class TestQueryParser extends LuceneTestCase { public void testBoost() throws Exception { CharacterRunAutomaton stopWords = new CharacterRunAutomaton(BasicAutomata.makeString("on")); - Analyzer oneStopAnalyzer = new MockAnalyzer(MockTokenizer.SIMPLE, true, stopWords, true); + Analyzer oneStopAnalyzer = new MockAnalyzer(random, MockTokenizer.SIMPLE, true, stopWords, true); QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "field", oneStopAnalyzer); Query q = qp.parse("on^1.0"); assertNotNull(q); @@ -844,7 +844,7 @@ public class TestQueryParser extends LuceneTestCase { q = qp.parse("\"on\"^1.0"); assertNotNull(q); - QueryParser qp2 = new QueryParser(TEST_VERSION_CURRENT, "field", new MockAnalyzer(MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET, true)); + QueryParser qp2 = new QueryParser(TEST_VERSION_CURRENT, "field", new MockAnalyzer(random, MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET, true)); q = qp2.parse("the^3"); // "the" is a stop word so the result is an empty query: assertNotNull(q); @@ -873,7 +873,7 @@ public class TestQueryParser extends LuceneTestCase { public void testCustomQueryParserWildcard() { try { - new QPTestParser("contents", new MockAnalyzer(MockTokenizer.WHITESPACE, false)).parse("a?t"); + new QPTestParser("contents", new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)).parse("a?t"); fail("Wildcard queries should not be allowed"); } catch (ParseException expected) { // expected exception @@ -882,7 +882,7 @@ public class TestQueryParser extends LuceneTestCase { public void testCustomQueryParserFuzzy() throws Exception { try { - new QPTestParser("contents", new MockAnalyzer(MockTokenizer.WHITESPACE, false)).parse("xunit~"); + new QPTestParser("contents", new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)).parse("xunit~"); fail("Fuzzy queries should not be allowed"); } catch (ParseException expected) { // expected exception @@ -892,7 +892,7 @@ public class TestQueryParser extends LuceneTestCase { public void testBooleanQuery() throws Exception { BooleanQuery.setMaxClauseCount(2); try { - QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "field", new MockAnalyzer(MockTokenizer.WHITESPACE, false)); + QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "field", new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)); qp.parse("one two three"); fail("ParseException expected due to too many boolean clauses"); } catch (ParseException expected) { @@ -904,7 +904,7 @@ public class TestQueryParser extends LuceneTestCase { * This test differs from TestPrecedenceQueryParser */ public void testPrecedence() throws Exception { - QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "field", new MockAnalyzer(MockTokenizer.WHITESPACE, false)); + QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "field", new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)); Query query1 = qp.parse("A AND B OR C AND D"); Query query2 = qp.parse("+A +B +C +D"); assertEquals(query1, query2); @@ -913,7 +913,7 @@ public class TestQueryParser extends LuceneTestCase { // Todo: convert this from DateField to DateUtil // public void testLocalDateFormat() throws IOException, ParseException { // Directory ramDir = newDirectory(); -// IndexWriter iw = new IndexWriter(ramDir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(MockTokenizer.WHITESPACE, false))); +// IndexWriter iw = new IndexWriter(ramDir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random, MockTokenizer.WHITESPACE, false))); // addDateDoc("a", 2005, 12, 2, 10, 15, 33, iw); // addDateDoc("b", 2005, 12, 4, 22, 15, 00, iw); // iw.close(); @@ -940,7 +940,7 @@ public class TestQueryParser extends LuceneTestCase { public void testStarParsing() throws Exception { final int[] type = new int[1]; - QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "field", new MockAnalyzer(MockTokenizer.WHITESPACE, false)) { + QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "field", new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)) { @Override protected Query getWildcardQuery(String field, String termStr) throws ParseException { // override error checking of superclass @@ -999,13 +999,13 @@ public class TestQueryParser extends LuceneTestCase { } public void testEscapedWildcard() throws Exception { - QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "field", new MockAnalyzer(MockTokenizer.WHITESPACE, false)); + QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "field", new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)); WildcardQuery q = new WildcardQuery(new Term("field", "foo\\?ba?r")); assertEquals(q, qp.parse("foo\\?ba?r")); } public void testRegexps() throws Exception { - QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "field", new MockAnalyzer(MockTokenizer.WHITESPACE, false)); + QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "field", new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)); RegexpQuery q = new RegexpQuery(new Term("field", "[a-z][123]")); assertEquals(q, qp.parse("/[a-z][123]/")); qp.setLowercaseExpandedTerms(true); @@ -1033,7 +1033,7 @@ public class TestQueryParser extends LuceneTestCase { public void testStopwords() throws Exception { CharacterRunAutomaton stopSet = new CharacterRunAutomaton(new RegExp("the|foo").toAutomaton()); - QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "a", new MockAnalyzer(MockTokenizer.SIMPLE, true, stopSet, true)); + QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "a", new MockAnalyzer(random, MockTokenizer.SIMPLE, true, stopSet, true)); Query result = qp.parse("a:the OR a:foo"); assertNotNull("result is null and it shouldn't be", result); assertTrue("result is not a BooleanQuery", result instanceof BooleanQuery); @@ -1049,7 +1049,7 @@ public class TestQueryParser extends LuceneTestCase { } public void testPositionIncrement() throws Exception { - QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "a", new MockAnalyzer(MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET, true)); + QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "a", new MockAnalyzer(random, MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET, true)); qp.setEnablePositionIncrements(true); String qtxt = "\"the words in poisitions pos02578 are stopped in this phrasequery\""; // 0 2 5 7 8 @@ -1066,7 +1066,7 @@ public class TestQueryParser extends LuceneTestCase { } public void testMatchAllDocs() throws Exception { - QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "field", new MockAnalyzer(MockTokenizer.WHITESPACE, false)); + QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "field", new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)); assertEquals(new MatchAllDocsQuery(), qp.parse("*:*")); assertEquals(new MatchAllDocsQuery(), qp.parse("(*:*)")); BooleanQuery bq = (BooleanQuery)qp.parse("+*:* -*:*"); @@ -1075,7 +1075,7 @@ public class TestQueryParser extends LuceneTestCase { } private void assertHits(int expected, String query, IndexSearcher is) throws ParseException, IOException { - QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "date", new MockAnalyzer(MockTokenizer.WHITESPACE, false)); + QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "date", new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)); qp.setLocale(Locale.ENGLISH); Query q = qp.parse(query); ScoreDoc[] hits = is.search(q, null, 1000).scoreDocs; @@ -1093,7 +1093,7 @@ public class TestQueryParser extends LuceneTestCase { // "match" public void testPositionIncrements() throws Exception { Directory dir = newDirectory(); - Analyzer a = new MockAnalyzer(MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET, true); + Analyzer a = new MockAnalyzer(random, MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET, true); IndexWriter w = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, a)); Document doc = new Document(); doc.add(newField("f", "the wizard of ozzy", Field.Store.NO, Field.Index.ANALYZED)); diff --git a/lucene/src/test/org/apache/lucene/search/BaseTestRangeFilter.java b/lucene/src/test/org/apache/lucene/search/BaseTestRangeFilter.java index 6d21b13185c..f8c0490c59c 100644 --- a/lucene/src/test/org/apache/lucene/search/BaseTestRangeFilter.java +++ b/lucene/src/test/org/apache/lucene/search/BaseTestRangeFilter.java @@ -123,7 +123,7 @@ public class BaseTestRangeFilter extends LuceneTestCase { doc.add(bodyField); RandomIndexWriter writer = new RandomIndexWriter(random, index.index, - newIndexWriterConfig(random, TEST_VERSION_CURRENT, new MockAnalyzer()) + newIndexWriterConfig(random, TEST_VERSION_CURRENT, new MockAnalyzer(random)) .setOpenMode(OpenMode.CREATE).setMaxBufferedDocs(_TestUtil.nextInt(random, 50, 1000)).setMergePolicy(newLogMergePolicy())); _TestUtil.reduceOpenFiles(writer.w); diff --git a/lucene/src/test/org/apache/lucene/search/TestBoolean2.java b/lucene/src/test/org/apache/lucene/search/TestBoolean2.java index 0602cab1f62..16a6cd6b258 100644 --- a/lucene/src/test/org/apache/lucene/search/TestBoolean2.java +++ b/lucene/src/test/org/apache/lucene/search/TestBoolean2.java @@ -54,7 +54,7 @@ public class TestBoolean2 extends LuceneTestCase { @BeforeClass public static void beforeClass() throws Exception { directory = newDirectory(); - RandomIndexWriter writer= new RandomIndexWriter(random, directory, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).setMergePolicy(newInOrderLogMergePolicy())); + RandomIndexWriter writer= new RandomIndexWriter(random, directory, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMergePolicy(newLogMergePolicy())); for (int i = 0; i < docFields.length; i++) { Document doc = new Document(); doc.add(newField(field, docFields[i], Field.Store.NO, Field.Index.ANALYZED)); @@ -79,7 +79,7 @@ public class TestBoolean2 extends LuceneTestCase { } while(docCount < 3000); RandomIndexWriter w = new RandomIndexWriter(random, dir2, - newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()) + newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)) .setMaxBufferedDocs(_TestUtil.nextInt(random, 50, 1000))); Document doc = new Document(); doc.add(newField("field2", "xxx", Field.Store.NO, Field.Index.ANALYZED)); @@ -118,7 +118,7 @@ public class TestBoolean2 extends LuceneTestCase { }; public Query makeQuery(String queryText) throws ParseException { - Query q = (new QueryParser(TEST_VERSION_CURRENT, field, new MockAnalyzer())).parse(queryText); + Query q = (new QueryParser(TEST_VERSION_CURRENT, field, new MockAnalyzer(random))).parse(queryText); return q; } diff --git a/lucene/src/test/org/apache/lucene/search/TestBooleanQuery.java b/lucene/src/test/org/apache/lucene/search/TestBooleanQuery.java index 50b95728c99..14ba6850f8a 100644 --- a/lucene/src/test/org/apache/lucene/search/TestBooleanQuery.java +++ b/lucene/src/test/org/apache/lucene/search/TestBooleanQuery.java @@ -143,7 +143,7 @@ public class TestBooleanQuery extends LuceneTestCase { IndexReader reader2 = iw2.getReader(); iw2.close(); - QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "field", new MockAnalyzer()); + QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "field", new MockAnalyzer(random)); qp.setMultiTermRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE); MultiReader multireader = new MultiReader(reader1, reader2); diff --git a/lucene/src/test/org/apache/lucene/search/TestCachingSpanFilter.java b/lucene/src/test/org/apache/lucene/search/TestCachingSpanFilter.java index 5d8aa209107..bb4e8be6ff3 100644 --- a/lucene/src/test/org/apache/lucene/search/TestCachingSpanFilter.java +++ b/lucene/src/test/org/apache/lucene/search/TestCachingSpanFilter.java @@ -38,7 +38,7 @@ public class TestCachingSpanFilter extends LuceneTestCase { RandomIndexWriter writer = new RandomIndexWriter( random, dir, - newIndexWriterConfig(random, TEST_VERSION_CURRENT, new MockAnalyzer()). + newIndexWriterConfig(random, TEST_VERSION_CURRENT, new MockAnalyzer(random)). setMergeScheduler(new SerialMergeScheduler()). // asserts below requires no unexpected merges: setMergePolicy(newLogMergePolicy(10)) @@ -49,7 +49,8 @@ public class TestCachingSpanFilter extends LuceneTestCase { // but we use .reopen on this reader below and expect to // (must) get an NRT reader: IndexReader reader = IndexReader.open(writer.w, true); - IndexSearcher searcher = newSearcher(reader); + // same reason we don't wrap? + IndexSearcher searcher = newSearcher(reader, false); // add a doc, refresh the reader, and check that its there Document doc = new Document(); @@ -58,7 +59,7 @@ public class TestCachingSpanFilter extends LuceneTestCase { reader = refreshReader(reader); searcher.close(); - searcher = newSearcher(reader); + searcher = newSearcher(reader, false); TopDocs docs = searcher.search(new MatchAllDocsQuery(), 1); assertEquals("Should find a hit...", 1, docs.totalHits); @@ -81,7 +82,7 @@ public class TestCachingSpanFilter extends LuceneTestCase { reader = refreshReader(reader); searcher.close(); - searcher = newSearcher(reader); + searcher = newSearcher(reader, false); docs = searcher.search(new MatchAllDocsQuery(), filter, 1); assertEquals("[query + filter] Should *not* find a hit...", 0, docs.totalHits); @@ -96,7 +97,7 @@ public class TestCachingSpanFilter extends LuceneTestCase { writer.addDocument(doc); reader = refreshReader(reader); searcher.close(); - searcher = newSearcher(reader); + searcher = newSearcher(reader, false); docs = searcher.search(new MatchAllDocsQuery(), filter, 1); assertEquals("[query + filter] Should find a hit...", 1, docs.totalHits); @@ -115,7 +116,7 @@ public class TestCachingSpanFilter extends LuceneTestCase { reader = refreshReader(reader); assertTrue(reader != oldReader); searcher.close(); - searcher = newSearcher(reader); + searcher = newSearcher(reader, false); int missCount = filter.missCount; docs = searcher.search(constantScore, 1); assertEquals("[just filter] Should find a hit...", 1, docs.totalHits); @@ -126,7 +127,7 @@ public class TestCachingSpanFilter extends LuceneTestCase { reader = refreshReader(reader); searcher.close(); - searcher = newSearcher(reader); + searcher = newSearcher(reader, false); docs = searcher.search(new MatchAllDocsQuery(), filter, 1); assertEquals("[query + filter] Should *not* find a hit...", 0, docs.totalHits); diff --git a/lucene/src/test/org/apache/lucene/search/TestCachingWrapperFilter.java b/lucene/src/test/org/apache/lucene/search/TestCachingWrapperFilter.java index 6c7f7af1165..5a0099883dc 100644 --- a/lucene/src/test/org/apache/lucene/search/TestCachingWrapperFilter.java +++ b/lucene/src/test/org/apache/lucene/search/TestCachingWrapperFilter.java @@ -160,7 +160,7 @@ public class TestCachingWrapperFilter extends LuceneTestCase { RandomIndexWriter writer = new RandomIndexWriter( random, dir, - newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()). + newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)). setMergeScheduler(new SerialMergeScheduler()). // asserts below requires no unexpected merges: setMergePolicy(newLogMergePolicy(10)) @@ -171,7 +171,8 @@ public class TestCachingWrapperFilter extends LuceneTestCase { // but we use .reopen on this reader below and expect to // (must) get an NRT reader: IndexReader reader = IndexReader.open(writer.w, true); - IndexSearcher searcher = newSearcher(reader); + // same reason we don't wrap? + IndexSearcher searcher = newSearcher(reader, false); // add a doc, refresh the reader, and check that its there Document doc = new Document(); @@ -180,7 +181,7 @@ public class TestCachingWrapperFilter extends LuceneTestCase { reader = refreshReader(reader); searcher.close(); - searcher = newSearcher(reader); + searcher = newSearcher(reader, false); TopDocs docs = searcher.search(new MatchAllDocsQuery(), 1); assertEquals("Should find a hit...", 1, docs.totalHits); @@ -202,7 +203,7 @@ public class TestCachingWrapperFilter extends LuceneTestCase { reader = refreshReader(reader); searcher.close(); - searcher = newSearcher(reader); + searcher = newSearcher(reader, false); docs = searcher.search(new MatchAllDocsQuery(), filter, 1); assertEquals("[query + filter] Should *not* find a hit...", 0, docs.totalHits); @@ -218,7 +219,7 @@ public class TestCachingWrapperFilter extends LuceneTestCase { reader = refreshReader(reader); searcher.close(); - searcher = newSearcher(reader); + searcher = newSearcher(reader, false); docs = searcher.search(new MatchAllDocsQuery(), filter, 1); @@ -238,7 +239,7 @@ public class TestCachingWrapperFilter extends LuceneTestCase { reader = refreshReader(reader); assertTrue(reader != oldReader); searcher.close(); - searcher = newSearcher(reader); + searcher = newSearcher(reader, false); int missCount = filter.missCount; docs = searcher.search(constantScore, 1); assertEquals("[just filter] Should find a hit...", 1, docs.totalHits); @@ -249,7 +250,7 @@ public class TestCachingWrapperFilter extends LuceneTestCase { reader = refreshReader(reader); searcher.close(); - searcher = newSearcher(reader); + searcher = newSearcher(reader, false); missCount = filter.missCount; docs = searcher.search(new MatchAllDocsQuery(), filter, 1); @@ -265,7 +266,7 @@ public class TestCachingWrapperFilter extends LuceneTestCase { writer.addDocument(doc); reader = refreshReader(reader); searcher.close(); - searcher = newSearcher(reader); + searcher = newSearcher(reader, false); docs = searcher.search(new MatchAllDocsQuery(), filter, 1); assertEquals("[query + filter] Should find a hit...", 1, docs.totalHits); @@ -278,7 +279,7 @@ public class TestCachingWrapperFilter extends LuceneTestCase { reader = refreshReader(reader); searcher.close(); - searcher = newSearcher(reader); + searcher = newSearcher(reader, false); docs = searcher.search(new MatchAllDocsQuery(), filter, 1); assertEquals("[query + filter] Should *not* find a hit...", 0, docs.totalHits); diff --git a/lucene/src/test/org/apache/lucene/search/TestDateSort.java b/lucene/src/test/org/apache/lucene/search/TestDateSort.java index 5a4c1b3a1bf..45e0d8aa116 100644 --- a/lucene/src/test/org/apache/lucene/search/TestDateSort.java +++ b/lucene/src/test/org/apache/lucene/search/TestDateSort.java @@ -81,7 +81,7 @@ public class TestDateSort extends LuceneTestCase { Sort sort = new Sort(new SortField(DATE_TIME_FIELD, SortField.STRING, true)); - QueryParser queryParser = new QueryParser(TEST_VERSION_CURRENT, TEXT_FIELD, new MockAnalyzer()); + QueryParser queryParser = new QueryParser(TEST_VERSION_CURRENT, TEXT_FIELD, new MockAnalyzer(random)); Query query = queryParser.parse("Document"); // Execute the search and process the search results. diff --git a/lucene/src/test/org/apache/lucene/search/TestDisjunctionMaxQuery.java b/lucene/src/test/org/apache/lucene/search/TestDisjunctionMaxQuery.java index 0f1cb4d94dd..272384b0e01 100644 --- a/lucene/src/test/org/apache/lucene/search/TestDisjunctionMaxQuery.java +++ b/lucene/src/test/org/apache/lucene/search/TestDisjunctionMaxQuery.java @@ -89,8 +89,8 @@ public class TestDisjunctionMaxQuery extends LuceneTestCase { index = newDirectory(); RandomIndexWriter writer = new RandomIndexWriter(random, index, - newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer()) - .setSimilarityProvider(sim).setMergePolicy(newInOrderLogMergePolicy())); + newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)) + .setSimilarityProvider(sim).setMergePolicy(newLogMergePolicy())); // hed is the most important field, dek is secondary diff --git a/lucene/src/test/org/apache/lucene/search/TestDocBoost.java b/lucene/src/test/org/apache/lucene/search/TestDocBoost.java index 8521724a0c5..11d1505b240 100644 --- a/lucene/src/test/org/apache/lucene/search/TestDocBoost.java +++ b/lucene/src/test/org/apache/lucene/search/TestDocBoost.java @@ -37,7 +37,7 @@ public class TestDocBoost extends LuceneTestCase { public void testDocBoost() throws Exception { Directory store = newDirectory(); - RandomIndexWriter writer = new RandomIndexWriter(random, store, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).setMergePolicy(newInOrderLogMergePolicy())); + RandomIndexWriter writer = new RandomIndexWriter(random, store, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMergePolicy(newLogMergePolicy())); Fieldable f1 = newField("field", "word", Field.Store.YES, Field.Index.ANALYZED); Fieldable f2 = newField("field", "word", Field.Store.YES, Field.Index.ANALYZED); diff --git a/lucene/src/test/org/apache/lucene/search/TestElevationComparator.java b/lucene/src/test/org/apache/lucene/search/TestElevationComparator.java index cab656c48dc..896220025e8 100644 --- a/lucene/src/test/org/apache/lucene/search/TestElevationComparator.java +++ b/lucene/src/test/org/apache/lucene/search/TestElevationComparator.java @@ -39,7 +39,7 @@ public class TestElevationComparator extends LuceneTestCase { Directory directory = newDirectory(); IndexWriter writer = new IndexWriter( directory, - newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()). + newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)). setMaxBufferedDocs(2). setMergePolicy(newLogMergePolicy(1000)) ); diff --git a/lucene/src/test/org/apache/lucene/search/TestExplanations.java b/lucene/src/test/org/apache/lucene/search/TestExplanations.java index 467c9477484..34a9cd2a49c 100644 --- a/lucene/src/test/org/apache/lucene/search/TestExplanations.java +++ b/lucene/src/test/org/apache/lucene/search/TestExplanations.java @@ -57,7 +57,7 @@ public class TestExplanations extends LuceneTestCase { // same contents, but no field boost public static final String ALTFIELD = "alt"; public static final QueryParser qp = - new QueryParser(TEST_VERSION_CURRENT, FIELD, new MockAnalyzer()); + new QueryParser(TEST_VERSION_CURRENT, FIELD, new MockAnalyzer(random)); @Override public void tearDown() throws Exception { @@ -71,7 +71,7 @@ public class TestExplanations extends LuceneTestCase { public void setUp() throws Exception { super.setUp(); directory = newDirectory(); - RandomIndexWriter writer= new RandomIndexWriter(random, directory, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).setMergePolicy(newInOrderLogMergePolicy())); + RandomIndexWriter writer= new RandomIndexWriter(random, directory, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMergePolicy(newLogMergePolicy())); for (int i = 0; i < docFields.length; i++) { Document doc = new Document(); doc.add(newField(KEY, ""+i, Field.Store.NO, Field.Index.NOT_ANALYZED)); diff --git a/lucene/src/test/org/apache/lucene/search/TestFieldCache.java b/lucene/src/test/org/apache/lucene/search/TestFieldCache.java index 1bca291c661..a56cc366d35 100644 --- a/lucene/src/test/org/apache/lucene/search/TestFieldCache.java +++ b/lucene/src/test/org/apache/lucene/search/TestFieldCache.java @@ -41,7 +41,7 @@ public class TestFieldCache extends LuceneTestCase { public void setUp() throws Exception { super.setUp(); directory = newDirectory(); - RandomIndexWriter writer= new RandomIndexWriter(random, directory, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).setMergePolicy(newInOrderLogMergePolicy())); + RandomIndexWriter writer= new RandomIndexWriter(random, directory, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMergePolicy(newLogMergePolicy())); long theLong = Long.MAX_VALUE; double theDouble = Double.MAX_VALUE; byte theByte = Byte.MAX_VALUE; @@ -213,7 +213,7 @@ public class TestFieldCache extends LuceneTestCase { public void testEmptyIndex() throws Exception { Directory dir = newDirectory(); - IndexWriter writer= new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer()).setMaxBufferedDocs(500)); + IndexWriter writer= new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMaxBufferedDocs(500)); IndexReader r = IndexReader.open(writer, true); FieldCache.DocTerms terms = FieldCache.DEFAULT.getTerms(r, "foobar"); FieldCache.DocTermsIndex termsIndex = FieldCache.DEFAULT.getTermsIndex(r, "foobar"); diff --git a/lucene/src/test/org/apache/lucene/search/TestFieldCacheRangeFilter.java b/lucene/src/test/org/apache/lucene/search/TestFieldCacheRangeFilter.java index db56940e110..d33f5a388e4 100644 --- a/lucene/src/test/org/apache/lucene/search/TestFieldCacheRangeFilter.java +++ b/lucene/src/test/org/apache/lucene/search/TestFieldCacheRangeFilter.java @@ -531,7 +531,7 @@ public class TestFieldCacheRangeFilter extends BaseTestRangeFilter { @Test public void testSparseIndex() throws IOException { Directory dir = newDirectory(); - IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer())); + IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random))); for (int d = -20; d <= 20; d++) { Document doc = new Document(); diff --git a/lucene/src/test/org/apache/lucene/search/TestFilteredQuery.java b/lucene/src/test/org/apache/lucene/search/TestFilteredQuery.java index 43c328e72c5..3062720a992 100644 --- a/lucene/src/test/org/apache/lucene/search/TestFilteredQuery.java +++ b/lucene/src/test/org/apache/lucene/search/TestFilteredQuery.java @@ -51,7 +51,7 @@ public class TestFilteredQuery extends LuceneTestCase { public void setUp() throws Exception { super.setUp(); directory = newDirectory(); - RandomIndexWriter writer = new RandomIndexWriter (random, directory, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).setMergePolicy(newInOrderLogMergePolicy())); + RandomIndexWriter writer = new RandomIndexWriter (random, directory, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMergePolicy(newLogMergePolicy())); Document doc = new Document(); doc.add (newField("field", "one two three four five", Field.Store.YES, Field.Index.ANALYZED)); diff --git a/lucene/src/test/org/apache/lucene/search/TestFilteredSearch.java b/lucene/src/test/org/apache/lucene/search/TestFilteredSearch.java index bada9039631..a036d0f1c1a 100644 --- a/lucene/src/test/org/apache/lucene/search/TestFilteredSearch.java +++ b/lucene/src/test/org/apache/lucene/search/TestFilteredSearch.java @@ -46,14 +46,14 @@ public class TestFilteredSearch extends LuceneTestCase { Directory directory = newDirectory(); int[] filterBits = {1, 36}; SimpleDocIdSetFilter filter = new SimpleDocIdSetFilter(filterBits); - IndexWriter writer = new IndexWriter(directory, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer()).setMergePolicy(newInOrderLogMergePolicy())); + IndexWriter writer = new IndexWriter(directory, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMergePolicy(newLogMergePolicy())); searchFiltered(writer, directory, filter, enforceSingleSegment); // run the test on more than one segment enforceSingleSegment = false; // reset - it is stateful filter.reset(); writer.close(); - writer = new IndexWriter(directory, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer()).setOpenMode(OpenMode.CREATE).setMaxBufferedDocs(10).setMergePolicy(newInOrderLogMergePolicy())); + writer = new IndexWriter(directory, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)).setOpenMode(OpenMode.CREATE).setMaxBufferedDocs(10).setMergePolicy(newLogMergePolicy())); // we index 60 docs - this will create 6 segments searchFiltered(writer, directory, filter, enforceSingleSegment); writer.close(); diff --git a/lucene/src/test/org/apache/lucene/search/TestFuzzyQuery.java b/lucene/src/test/org/apache/lucene/search/TestFuzzyQuery.java index c478a59f0d8..a3d5453af78 100644 --- a/lucene/src/test/org/apache/lucene/search/TestFuzzyQuery.java +++ b/lucene/src/test/org/apache/lucene/search/TestFuzzyQuery.java @@ -386,7 +386,7 @@ public class TestFuzzyQuery extends LuceneTestCase { public void testGiga() throws Exception { - MockAnalyzer analyzer = new MockAnalyzer(); + MockAnalyzer analyzer = new MockAnalyzer(random); Directory index = newDirectory(); RandomIndexWriter w = new RandomIndexWriter(random, index); @@ -423,7 +423,7 @@ public class TestFuzzyQuery extends LuceneTestCase { } public void testDistanceAsEditsParsing() throws Exception { - QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "field", new MockAnalyzer()); + QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "field", new MockAnalyzer(random)); FuzzyQuery q = (FuzzyQuery) qp.parse("foobar~2"); assertEquals(2f, q.getMinSimilarity(), 0.0001f); } @@ -437,7 +437,7 @@ public class TestFuzzyQuery extends LuceneTestCase { IndexReader reader = w.getReader(); IndexSearcher searcher = newSearcher(reader); w.close(); - QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "field", new MockAnalyzer()); + QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "field", new MockAnalyzer(random)); FuzzyQuery q = (FuzzyQuery) qp.parse("fouba~2"); ScoreDoc[] hits = searcher.search(q, 10).scoreDocs; diff --git a/lucene/src/test/org/apache/lucene/search/TestFuzzyQuery2.java b/lucene/src/test/org/apache/lucene/search/TestFuzzyQuery2.java index 262386a43b6..0d3bc5aa9b7 100644 --- a/lucene/src/test/org/apache/lucene/search/TestFuzzyQuery2.java +++ b/lucene/src/test/org/apache/lucene/search/TestFuzzyQuery2.java @@ -79,7 +79,7 @@ public class TestFuzzyQuery2 extends LuceneTestCase { int terms = (int) Math.pow(2, bits); Directory dir = newDirectory(); - RandomIndexWriter writer = new RandomIndexWriter(random, dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(MockTokenizer.KEYWORD, false)).setMergePolicy(newInOrderLogMergePolicy())); + RandomIndexWriter writer = new RandomIndexWriter(random, dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random, MockTokenizer.KEYWORD, false)).setMergePolicy(newLogMergePolicy())); Document doc = new Document(); Field field = newField("field", "", Field.Store.NO, Field.Index.ANALYZED); diff --git a/lucene/src/test/org/apache/lucene/search/TestMatchAllDocsQuery.java b/lucene/src/test/org/apache/lucene/search/TestMatchAllDocsQuery.java index 8d96c0feb2e..1fff44306e2 100644 --- a/lucene/src/test/org/apache/lucene/search/TestMatchAllDocsQuery.java +++ b/lucene/src/test/org/apache/lucene/search/TestMatchAllDocsQuery.java @@ -35,12 +35,12 @@ import org.apache.lucene.util.LuceneTestCase; * */ public class TestMatchAllDocsQuery extends LuceneTestCase { - private Analyzer analyzer = new MockAnalyzer(); + private Analyzer analyzer = new MockAnalyzer(random); public void testQuery() throws Exception { Directory dir = newDirectory(); IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig( - TEST_VERSION_CURRENT, analyzer).setMaxBufferedDocs(2).setMergePolicy(newInOrderLogMergePolicy())); + TEST_VERSION_CURRENT, analyzer).setMaxBufferedDocs(2).setMergePolicy(newLogMergePolicy())); addDoc("one", iw, 1f); addDoc("two", iw, 20f); addDoc("three four", iw, 300f); @@ -54,9 +54,9 @@ public class TestMatchAllDocsQuery extends LuceneTestCase { hits = is.search(new MatchAllDocsQuery(), null, 1000).scoreDocs; assertEquals(3, hits.length); - assertEquals("one", ir.document(hits[0].doc).get("key")); - assertEquals("two", ir.document(hits[1].doc).get("key")); - assertEquals("three four", ir.document(hits[2].doc).get("key")); + assertEquals("one", is.doc(hits[0].doc).get("key")); + assertEquals("two", is.doc(hits[1].doc).get("key")); + assertEquals("three four", is.doc(hits[2].doc).get("key")); // assert with norms scoring turned on @@ -64,19 +64,19 @@ public class TestMatchAllDocsQuery extends LuceneTestCase { hits = is.search(normsQuery, null, 1000).scoreDocs; assertEquals(3, hits.length); - assertEquals("three four", ir.document(hits[0].doc).get("key")); - assertEquals("two", ir.document(hits[1].doc).get("key")); - assertEquals("one", ir.document(hits[2].doc).get("key")); + assertEquals("three four", is.doc(hits[0].doc).get("key")); + assertEquals("two", is.doc(hits[1].doc).get("key")); + assertEquals("one", is.doc(hits[2].doc).get("key")); // change norm & retest - ir.setNorm(0, "key", is.getSimilarityProvider().get("key").encodeNormValue(400f)); + is.getIndexReader().setNorm(0, "key", is.getSimilarityProvider().get("key").encodeNormValue(400f)); normsQuery = new MatchAllDocsQuery("key"); hits = is.search(normsQuery, null, 1000).scoreDocs; assertEquals(3, hits.length); - assertEquals("one", ir.document(hits[0].doc).get("key")); - assertEquals("three four", ir.document(hits[1].doc).get("key")); - assertEquals("two", ir.document(hits[2].doc).get("key")); + assertEquals("one", is.doc(hits[0].doc).get("key")); + assertEquals("three four", is.doc(hits[1].doc).get("key")); + assertEquals("two", is.doc(hits[2].doc).get("key")); // some artificial queries to trigger the use of skipTo(): @@ -93,7 +93,7 @@ public class TestMatchAllDocsQuery extends LuceneTestCase { assertEquals(1, hits.length); // delete a document: - ir.deleteDocument(0); + is.getIndexReader().deleteDocument(0); hits = is.search(new MatchAllDocsQuery(), null, 1000).scoreDocs; assertEquals(2, hits.length); diff --git a/lucene/src/test/org/apache/lucene/search/TestMultiTermConstantScore.java b/lucene/src/test/org/apache/lucene/search/TestMultiTermConstantScore.java index da75d4e2a83..59b05e17439 100644 --- a/lucene/src/test/org/apache/lucene/search/TestMultiTermConstantScore.java +++ b/lucene/src/test/org/apache/lucene/search/TestMultiTermConstantScore.java @@ -62,7 +62,7 @@ public class TestMultiTermConstantScore extends BaseTestRangeFilter { small = newDirectory(); RandomIndexWriter writer = new RandomIndexWriter(random, small, newIndexWriterConfig(TEST_VERSION_CURRENT, - new MockAnalyzer(MockTokenizer.WHITESPACE, false)).setMergePolicy(newInOrderLogMergePolicy())); + new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)).setMergePolicy(newLogMergePolicy())); for (int i = 0; i < data.length; i++) { Document doc = new Document(); diff --git a/lucene/src/test/org/apache/lucene/search/TestMultiTermQueryRewrites.java b/lucene/src/test/org/apache/lucene/search/TestMultiTermQueryRewrites.java index a1fe8e6ed6c..b5ff4f3346b 100644 --- a/lucene/src/test/org/apache/lucene/search/TestMultiTermQueryRewrites.java +++ b/lucene/src/test/org/apache/lucene/search/TestMultiTermQueryRewrites.java @@ -47,9 +47,9 @@ public class TestMultiTermQueryRewrites extends LuceneTestCase { dir = newDirectory(); sdir1 = newDirectory(); sdir2 = newDirectory(); - final RandomIndexWriter writer = new RandomIndexWriter(random, dir, new MockAnalyzer()); - final RandomIndexWriter swriter1 = new RandomIndexWriter(random, sdir1, new MockAnalyzer()); - final RandomIndexWriter swriter2 = new RandomIndexWriter(random, sdir2, new MockAnalyzer()); + final RandomIndexWriter writer = new RandomIndexWriter(random, dir, new MockAnalyzer(random)); + final RandomIndexWriter swriter1 = new RandomIndexWriter(random, sdir1, new MockAnalyzer(random)); + final RandomIndexWriter swriter2 = new RandomIndexWriter(random, sdir2, new MockAnalyzer(random)); for (int i = 0; i < 10; i++) { Document doc = new Document(); diff --git a/lucene/src/test/org/apache/lucene/search/TestMultiThreadTermVectors.java b/lucene/src/test/org/apache/lucene/search/TestMultiThreadTermVectors.java index 7657d25dc78..6f2807de8be 100644 --- a/lucene/src/test/org/apache/lucene/search/TestMultiThreadTermVectors.java +++ b/lucene/src/test/org/apache/lucene/search/TestMultiThreadTermVectors.java @@ -38,7 +38,7 @@ public class TestMultiThreadTermVectors extends LuceneTestCase { public void setUp() throws Exception { super.setUp(); directory = newDirectory(); - IndexWriter writer = new IndexWriter(directory, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer()).setMergePolicy(newInOrderLogMergePolicy())); + IndexWriter writer = new IndexWriter(directory, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMergePolicy(newLogMergePolicy())); //writer.setUseCompoundFile(false); //writer.infoStream = System.out; for (int i = 0; i < numDocs; i++) { diff --git a/lucene/src/test/org/apache/lucene/search/TestMultiValuedNumericRangeQuery.java b/lucene/src/test/org/apache/lucene/search/TestMultiValuedNumericRangeQuery.java index ed2699a4c94..fcad856e416 100644 --- a/lucene/src/test/org/apache/lucene/search/TestMultiValuedNumericRangeQuery.java +++ b/lucene/src/test/org/apache/lucene/search/TestMultiValuedNumericRangeQuery.java @@ -41,7 +41,7 @@ public class TestMultiValuedNumericRangeQuery extends LuceneTestCase { public void testMultiValuedNRQ() throws Exception { Directory directory = newDirectory(); RandomIndexWriter writer = new RandomIndexWriter(random, directory, - newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()) + newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)) .setMaxBufferedDocs(_TestUtil.nextInt(random, 50, 1000))); DecimalFormat format = new DecimalFormat("00000000000", new DecimalFormatSymbols(Locale.US)); diff --git a/lucene/src/test/org/apache/lucene/search/TestNot.java b/lucene/src/test/org/apache/lucene/search/TestNot.java index d86f13f8a31..2e9e445294e 100644 --- a/lucene/src/test/org/apache/lucene/search/TestNot.java +++ b/lucene/src/test/org/apache/lucene/search/TestNot.java @@ -45,7 +45,7 @@ public class TestNot extends LuceneTestCase { IndexReader reader = writer.getReader(); IndexSearcher searcher = newSearcher(reader); - QueryParser parser = new QueryParser(TEST_VERSION_CURRENT, "field", new MockAnalyzer()); + QueryParser parser = new QueryParser(TEST_VERSION_CURRENT, "field", new MockAnalyzer(random)); Query query = parser.parse("a NOT b"); //System.out.println(query); ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs; diff --git a/lucene/src/test/org/apache/lucene/search/TestNumericRangeQuery32.java b/lucene/src/test/org/apache/lucene/search/TestNumericRangeQuery32.java index c6342a0b7de..476a50f76ca 100644 --- a/lucene/src/test/org/apache/lucene/search/TestNumericRangeQuery32.java +++ b/lucene/src/test/org/apache/lucene/search/TestNumericRangeQuery32.java @@ -54,9 +54,9 @@ public class TestNumericRangeQuery32 extends LuceneTestCase { public static void beforeClass() throws Exception { directory = newDirectory(); RandomIndexWriter writer = new RandomIndexWriter(random, directory, - newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()) + newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)) .setMaxBufferedDocs(_TestUtil.nextInt(random, 50, 1000)) - .setMergePolicy(newInOrderLogMergePolicy())); + .setMergePolicy(newLogMergePolicy())); NumericField field8 = new NumericField("field8", 8, Field.Store.YES, true), @@ -154,7 +154,9 @@ public class TestNumericRangeQuery32 extends LuceneTestCase { assertEquals("First doc"+type, 2*distance+startOffset, Integer.parseInt(doc.get(field)) ); doc=searcher.doc(sd[sd.length-1].doc); assertEquals("Last doc"+type, (1+count)*distance+startOffset, Integer.parseInt(doc.get(field)) ); - if (i>0 && searcher.getIndexReader().getSequentialSubReaders().length == 1) { + if (i>0 && + (searcher.getIndexReader().getSequentialSubReaders() == null || + searcher.getIndexReader().getSequentialSubReaders().length == 1)) { assertEquals("Distinct term number is equal for all query types", lastTerms, terms); } lastTerms = terms; @@ -284,7 +286,7 @@ public class TestNumericRangeQuery32 extends LuceneTestCase { @Test public void testInfiniteValues() throws Exception { Directory dir = newDirectory(); - IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer())); + IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random))); Document doc = new Document(); doc.add(new NumericField("float").setFloatValue(Float.NEGATIVE_INFINITY)); doc.add(new NumericField("int").setIntValue(Integer.MIN_VALUE)); @@ -378,7 +380,9 @@ public class TestNumericRangeQuery32 extends LuceneTestCase { termCountT += tq.getTotalNumberOfTerms(); termCountC += cq.getTotalNumberOfTerms(); } - if (precisionStep == Integer.MAX_VALUE && searcher.getIndexReader().getSequentialSubReaders().length == 1) { + if (precisionStep == Integer.MAX_VALUE && + (searcher.getIndexReader().getSequentialSubReaders() == null || + searcher.getIndexReader().getSequentialSubReaders().length == 1)) { assertEquals("Total number of terms should be equal for unlimited precStep", termCountT, termCountC); } else if (VERBOSE) { System.out.println("Average number of terms during random search on '" + field + "':"); diff --git a/lucene/src/test/org/apache/lucene/search/TestNumericRangeQuery64.java b/lucene/src/test/org/apache/lucene/search/TestNumericRangeQuery64.java index d5591778133..171dbabc760 100644 --- a/lucene/src/test/org/apache/lucene/search/TestNumericRangeQuery64.java +++ b/lucene/src/test/org/apache/lucene/search/TestNumericRangeQuery64.java @@ -51,9 +51,9 @@ public class TestNumericRangeQuery64 extends LuceneTestCase { public static void beforeClass() throws Exception { directory = newDirectory(); RandomIndexWriter writer = new RandomIndexWriter(random, directory, - newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()) + newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)) .setMaxBufferedDocs(_TestUtil.nextInt(random, 50, 1000)) - .setMergePolicy(newInOrderLogMergePolicy())); + .setMergePolicy(newLogMergePolicy())); NumericField field8 = new NumericField("field8", 8, Field.Store.YES, true), @@ -154,7 +154,9 @@ public class TestNumericRangeQuery64 extends LuceneTestCase { assertEquals("First doc"+type, 2*distance+startOffset, Long.parseLong(doc.get(field)) ); doc=searcher.doc(sd[sd.length-1].doc); assertEquals("Last doc"+type, (1+count)*distance+startOffset, Long.parseLong(doc.get(field)) ); - if (i>0 && searcher.getIndexReader().getSequentialSubReaders().length == 1) { + if (i>0 && + (searcher.getIndexReader().getSequentialSubReaders() == null || + searcher.getIndexReader().getSequentialSubReaders().length == 1)) { assertEquals("Distinct term number is equal for all query types", lastTerms, terms); } lastTerms = terms; @@ -301,7 +303,7 @@ public class TestNumericRangeQuery64 extends LuceneTestCase { public void testInfiniteValues() throws Exception { Directory dir = newDirectory(); IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( - TEST_VERSION_CURRENT, new MockAnalyzer())); + TEST_VERSION_CURRENT, new MockAnalyzer(random))); Document doc = new Document(); doc.add(new NumericField("double").setDoubleValue(Double.NEGATIVE_INFINITY)); doc.add(new NumericField("long").setLongValue(Long.MIN_VALUE)); @@ -395,7 +397,9 @@ public class TestNumericRangeQuery64 extends LuceneTestCase { termCountT += tq.getTotalNumberOfTerms(); termCountC += cq.getTotalNumberOfTerms(); } - if (precisionStep == Integer.MAX_VALUE && searcher.getIndexReader().getSequentialSubReaders().length == 1) { + if (precisionStep == Integer.MAX_VALUE && + (searcher.getIndexReader().getSequentialSubReaders() == null || + searcher.getIndexReader().getSequentialSubReaders().length == 1)) { assertEquals("Total number of terms should be equal for unlimited precStep", termCountT, termCountC); } else if (VERBOSE) { System.out.println("Average number of terms during random search on '" + field + "':"); diff --git a/lucene/src/test/org/apache/lucene/search/TestPhraseQuery.java b/lucene/src/test/org/apache/lucene/search/TestPhraseQuery.java index 35349c696ab..74b8a1b08f9 100644 --- a/lucene/src/test/org/apache/lucene/search/TestPhraseQuery.java +++ b/lucene/src/test/org/apache/lucene/search/TestPhraseQuery.java @@ -212,7 +212,7 @@ public class TestPhraseQuery extends LuceneTestCase { public void testPhraseQueryWithStopAnalyzer() throws Exception { Directory directory = newDirectory(); - Analyzer stopAnalyzer = new MockAnalyzer(MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET, false); + Analyzer stopAnalyzer = new MockAnalyzer(random, MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET, false); RandomIndexWriter writer = new RandomIndexWriter(random, directory, newIndexWriterConfig( Version.LUCENE_40, stopAnalyzer)); Document doc = new Document(); @@ -285,7 +285,7 @@ public class TestPhraseQuery extends LuceneTestCase { reader.close(); writer = new RandomIndexWriter(random, directory, - newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer()).setOpenMode(OpenMode.CREATE)); + newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)).setOpenMode(OpenMode.CREATE)); doc = new Document(); doc.add(newField("contents", "map entry woo", Field.Store.YES, Field.Index.ANALYZED)); writer.addDocument(doc); @@ -335,7 +335,7 @@ public class TestPhraseQuery extends LuceneTestCase { public void testSlopScoring() throws IOException { Directory directory = newDirectory(); - RandomIndexWriter writer = new RandomIndexWriter(random, directory, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).setMergePolicy(newInOrderLogMergePolicy())); + RandomIndexWriter writer = new RandomIndexWriter(random, directory, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMergePolicy(newLogMergePolicy())); Document doc = new Document(); doc.add(newField("field", "foo firstname lastname foo", Field.Store.YES, Field.Index.ANALYZED)); @@ -374,7 +374,7 @@ public class TestPhraseQuery extends LuceneTestCase { } public void testToString() throws Exception { - Analyzer analyzer = new MockAnalyzer(MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET, true); + Analyzer analyzer = new MockAnalyzer(random, MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET, true); QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "field", analyzer); qp.setEnablePositionIncrements(true); PhraseQuery q = (PhraseQuery)qp.parse("\"this hi this is a test is\""); @@ -596,9 +596,9 @@ public class TestPhraseQuery extends LuceneTestCase { public void testRandomPhrases() throws Exception { Directory dir = newDirectory(); - Analyzer analyzer = new MockAnalyzer(); + Analyzer analyzer = new MockAnalyzer(random); - RandomIndexWriter w = new RandomIndexWriter(random, dir, newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).setMergePolicy(newInOrderLogMergePolicy())); + RandomIndexWriter w = new RandomIndexWriter(random, dir, newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).setMergePolicy(newLogMergePolicy())); List> docs = new ArrayList>(); Document d = new Document(); Field f = newField("f", "", Field.Store.NO, Field.Index.ANALYZED); diff --git a/lucene/src/test/org/apache/lucene/search/TestPositionIncrement.java b/lucene/src/test/org/apache/lucene/search/TestPositionIncrement.java index 3007d8a102e..eecf803c9f4 100644 --- a/lucene/src/test/org/apache/lucene/search/TestPositionIncrement.java +++ b/lucene/src/test/org/apache/lucene/search/TestPositionIncrement.java @@ -197,7 +197,7 @@ public class TestPositionIncrement extends LuceneTestCase { // should not find "1 2" because there is a gap of 1 in the index QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "field", - new MockAnalyzer(MockTokenizer.WHITESPACE, false, stopStopList, false, false)); + new MockAnalyzer(random, MockTokenizer.WHITESPACE, false, stopStopList, false)); q = (PhraseQuery) qp.parse("\"1 2\""); hits = searcher.search(q, null, 1000).scoreDocs; assertEquals(0, hits.length); @@ -221,7 +221,7 @@ public class TestPositionIncrement extends LuceneTestCase { // when both qp qnd stopFilter propagate increments, we should find the doc. qp = new QueryParser(TEST_VERSION_CURRENT, "field", - new MockAnalyzer(MockTokenizer.WHITESPACE, false, stopStopList, true, false)); + new MockAnalyzer(random, MockTokenizer.WHITESPACE, false, stopStopList, true)); qp.setEnablePositionIncrements(true); q = (PhraseQuery) qp.parse("\"1 stop 2\""); hits = searcher.search(q, null, 1000).scoreDocs; diff --git a/lucene/src/test/org/apache/lucene/search/TestPrefixRandom.java b/lucene/src/test/org/apache/lucene/search/TestPrefixRandom.java index 46c1a75aab2..9b15e1c05b6 100644 --- a/lucene/src/test/org/apache/lucene/search/TestPrefixRandom.java +++ b/lucene/src/test/org/apache/lucene/search/TestPrefixRandom.java @@ -49,7 +49,7 @@ public class TestPrefixRandom extends LuceneTestCase { super.setUp(); dir = newDirectory(); RandomIndexWriter writer = new RandomIndexWriter(random, dir, - newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(MockTokenizer.KEYWORD, false)) + newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random, MockTokenizer.KEYWORD, false)) .setMaxBufferedDocs(_TestUtil.nextInt(random, 50, 1000))); Document doc = new Document(); diff --git a/lucene/src/test/org/apache/lucene/search/TestQueryTermVector.java b/lucene/src/test/org/apache/lucene/search/TestQueryTermVector.java index db3752b7d55..488b886b999 100644 --- a/lucene/src/test/org/apache/lucene/search/TestQueryTermVector.java +++ b/lucene/src/test/org/apache/lucene/search/TestQueryTermVector.java @@ -39,7 +39,7 @@ public class TestQueryTermVector extends LuceneTestCase { result = new QueryTermVector(null); assertTrue(result.getTerms().length == 0); - result = new QueryTermVector("foo bar foo again foo bar go go go", new MockAnalyzer()); + result = new QueryTermVector("foo bar foo again foo bar go go go", new MockAnalyzer(random)); terms = result.getTerms(); assertTrue(terms.length == 4); freq = result.getTermFrequencies(); diff --git a/lucene/src/test/org/apache/lucene/search/TestRegexpRandom.java b/lucene/src/test/org/apache/lucene/search/TestRegexpRandom.java index 7180d3b82ec..826d8845033 100644 --- a/lucene/src/test/org/apache/lucene/search/TestRegexpRandom.java +++ b/lucene/src/test/org/apache/lucene/search/TestRegexpRandom.java @@ -47,7 +47,7 @@ public class TestRegexpRandom extends LuceneTestCase { super.setUp(); dir = newDirectory(); RandomIndexWriter writer = new RandomIndexWriter(random, dir, - newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()) + newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)) .setMaxBufferedDocs(_TestUtil.nextInt(random, 50, 1000))); Document doc = new Document(); diff --git a/lucene/src/test/org/apache/lucene/search/TestRegexpRandom2.java b/lucene/src/test/org/apache/lucene/search/TestRegexpRandom2.java index 143d977cfb7..f18a0f0dea0 100644 --- a/lucene/src/test/org/apache/lucene/search/TestRegexpRandom2.java +++ b/lucene/src/test/org/apache/lucene/search/TestRegexpRandom2.java @@ -49,7 +49,8 @@ import org.apache.lucene.util.automaton.RegExp; * Generates random regexps, and validates against a simple impl. */ public class TestRegexpRandom2 extends LuceneTestCase { - protected IndexSearcher searcher; + protected IndexSearcher searcher1; + protected IndexSearcher searcher2; private IndexReader reader; private Directory dir; @@ -58,7 +59,7 @@ public class TestRegexpRandom2 extends LuceneTestCase { super.setUp(); dir = newDirectory(); RandomIndexWriter writer = new RandomIndexWriter(random, dir, - newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(MockTokenizer.KEYWORD, false)) + newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random, MockTokenizer.KEYWORD, false)) .setMaxBufferedDocs(_TestUtil.nextInt(random, 50, 1000))); Document doc = new Document(); Field field = newField("field", "", Field.Store.NO, Field.Index.NOT_ANALYZED); @@ -82,14 +83,16 @@ public class TestRegexpRandom2 extends LuceneTestCase { } reader = writer.getReader(); - searcher = newSearcher(reader); + searcher1 = newSearcher(reader); + searcher2 = newSearcher(reader); writer.close(); } @Override public void tearDown() throws Exception { reader.close(); - searcher.close(); + searcher1.close(); + searcher2.close(); dir.close(); super.tearDown(); } @@ -157,12 +160,12 @@ public class TestRegexpRandom2 extends LuceneTestCase { // automatically comparable. // TODO: does this check even matter anymore?! - Terms terms = MultiFields.getTerms(searcher.getIndexReader(), "field"); + Terms terms = MultiFields.getTerms(searcher1.getIndexReader(), "field"); if (!(smart.getTermsEnum(terms) instanceof AutomatonTermsEnum)) return; - TopDocs smartDocs = searcher.search(smart, 25); - TopDocs dumbDocs = searcher.search(dumb, 25); + TopDocs smartDocs = searcher1.search(smart, 25); + TopDocs dumbDocs = searcher2.search(dumb, 25); CheckHits.checkEqual(smart, smartDocs.scoreDocs, dumbDocs.scoreDocs); } diff --git a/lucene/src/test/org/apache/lucene/search/TestScorerPerf.java b/lucene/src/test/org/apache/lucene/search/TestScorerPerf.java index 1fc436f8b3b..c8270e33d1a 100755 --- a/lucene/src/test/org/apache/lucene/search/TestScorerPerf.java +++ b/lucene/src/test/org/apache/lucene/search/TestScorerPerf.java @@ -44,7 +44,7 @@ public class TestScorerPerf extends LuceneTestCase { // Create a dummy index with nothing in it. // This could possibly fail if Lucene starts checking for docid ranges... d = newDirectory(); - IndexWriter iw = new IndexWriter(d, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer())); + IndexWriter iw = new IndexWriter(d, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random))); iw.addDocument(new Document()); iw.close(); s = new IndexSearcher(d, true); @@ -59,7 +59,7 @@ public class TestScorerPerf extends LuceneTestCase { terms[i] = new Term("f",Character.toString((char)('A'+i))); } - IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer()).setOpenMode(OpenMode.CREATE)); + IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)).setOpenMode(OpenMode.CREATE)); for (int i=0; i 1); + assertTrue("reader=" + r, r.getSequentialSubReaders().length > 1); ValueSource s1 = new IntFieldSource("field"); AtomicReaderContext[] leaves = ReaderUtil.leaves(r.getTopReaderContext()); diff --git a/lucene/src/test/org/apache/lucene/search/payloads/TestPayloadTermQuery.java b/lucene/src/test/org/apache/lucene/search/payloads/TestPayloadTermQuery.java index 2b8ad636789..0b8353a5255 100644 --- a/lucene/src/test/org/apache/lucene/search/payloads/TestPayloadTermQuery.java +++ b/lucene/src/test/org/apache/lucene/search/payloads/TestPayloadTermQuery.java @@ -113,7 +113,7 @@ public class TestPayloadTermQuery extends LuceneTestCase { directory = newDirectory(); RandomIndexWriter writer = new RandomIndexWriter(random, directory, newIndexWriterConfig(TEST_VERSION_CURRENT, new PayloadAnalyzer()) - .setSimilarityProvider(similarityProvider).setMergePolicy(newInOrderLogMergePolicy())); + .setSimilarityProvider(similarityProvider).setMergePolicy(newLogMergePolicy())); //writer.infoStream = System.out; for (int i = 0; i < 1000; i++) { Document doc = new Document(); diff --git a/lucene/src/test/org/apache/lucene/search/spans/TestBasics.java b/lucene/src/test/org/apache/lucene/search/spans/TestBasics.java index b04f96ae333..49edd5d2e12 100644 --- a/lucene/src/test/org/apache/lucene/search/spans/TestBasics.java +++ b/lucene/src/test/org/apache/lucene/search/spans/TestBasics.java @@ -18,13 +18,19 @@ package org.apache.lucene.search.spans; */ import java.io.IOException; +import java.io.Reader; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; import java.util.List; +import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.analysis.MockTokenizer; +import org.apache.lucene.analysis.TokenFilter; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; +import org.apache.lucene.analysis.tokenattributes.PayloadAttribute; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.index.IndexReader; @@ -64,12 +70,53 @@ public class TestBasics extends LuceneTestCase { private static IndexReader reader; private static Directory directory; + static final class SimplePayloadFilter extends TokenFilter { + String fieldName; + int pos; + final PayloadAttribute payloadAttr; + final CharTermAttribute termAttr; + + public SimplePayloadFilter(TokenStream input, String fieldName) { + super(input); + this.fieldName = fieldName; + pos = 0; + payloadAttr = input.addAttribute(PayloadAttribute.class); + termAttr = input.addAttribute(CharTermAttribute.class); + } + + @Override + public boolean incrementToken() throws IOException { + if (input.incrementToken()) { + payloadAttr.setPayload(new Payload(("pos: " + pos).getBytes())); + pos++; + return true; + } else { + return false; + } + } + + @Override + public void reset() throws IOException { + super.reset(); + pos = 0; + } + } + + static final Analyzer simplePayloadAnalyzer = new Analyzer() { + + @Override + public TokenStream tokenStream(String fieldName, Reader reader) { + return new SimplePayloadFilter(new MockTokenizer(reader, MockTokenizer.SIMPLE, true), fieldName); + } + + }; + @BeforeClass public static void beforeClass() throws Exception { directory = newDirectory(); RandomIndexWriter writer = new RandomIndexWriter(random, directory, - newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(MockTokenizer.SIMPLE, true, true)) - .setMaxBufferedDocs(_TestUtil.nextInt(random, 50, 1000)).setMergePolicy(newInOrderLogMergePolicy())); + newIndexWriterConfig(TEST_VERSION_CURRENT, simplePayloadAnalyzer) + .setMaxBufferedDocs(_TestUtil.nextInt(random, 50, 1000)).setMergePolicy(newLogMergePolicy())); //writer.infoStream = System.out; for (int i = 0; i < 2000; i++) { Document doc = new Document(); diff --git a/lucene/src/test/org/apache/lucene/search/spans/TestFieldMaskingSpanQuery.java b/lucene/src/test/org/apache/lucene/search/spans/TestFieldMaskingSpanQuery.java index f86aea3b4b2..652dfaa9ea6 100644 --- a/lucene/src/test/org/apache/lucene/search/spans/TestFieldMaskingSpanQuery.java +++ b/lucene/src/test/org/apache/lucene/search/spans/TestFieldMaskingSpanQuery.java @@ -55,7 +55,7 @@ public class TestFieldMaskingSpanQuery extends LuceneTestCase { public void setUp() throws Exception { super.setUp(); directory = newDirectory(); - RandomIndexWriter writer= new RandomIndexWriter(random, directory, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).setMergePolicy(newInOrderLogMergePolicy())); + RandomIndexWriter writer= new RandomIndexWriter(random, directory, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMergePolicy(newLogMergePolicy())); writer.addDocument(doc(new Field[] { field("id", "0") , diff --git a/lucene/src/test/org/apache/lucene/search/spans/TestNearSpansOrdered.java b/lucene/src/test/org/apache/lucene/search/spans/TestNearSpansOrdered.java index 8316ff8d858..39102344d1f 100644 --- a/lucene/src/test/org/apache/lucene/search/spans/TestNearSpansOrdered.java +++ b/lucene/src/test/org/apache/lucene/search/spans/TestNearSpansOrdered.java @@ -43,7 +43,7 @@ public class TestNearSpansOrdered extends LuceneTestCase { public static final String FIELD = "field"; public static final QueryParser qp = - new QueryParser(TEST_VERSION_CURRENT, FIELD, new MockAnalyzer()); + new QueryParser(TEST_VERSION_CURRENT, FIELD, new MockAnalyzer(random)); @Override public void tearDown() throws Exception { @@ -57,7 +57,7 @@ public class TestNearSpansOrdered extends LuceneTestCase { public void setUp() throws Exception { super.setUp(); directory = newDirectory(); - RandomIndexWriter writer= new RandomIndexWriter(random, directory, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).setMergePolicy(newInOrderLogMergePolicy())); + RandomIndexWriter writer= new RandomIndexWriter(random, directory, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMergePolicy(newLogMergePolicy())); for (int i = 0; i < docFields.length; i++) { Document doc = new Document(); doc.add(newField(FIELD, docFields[i], Field.Store.NO, Field.Index.ANALYZED)); diff --git a/lucene/src/test/org/apache/lucene/search/spans/TestSpanFirstQuery.java b/lucene/src/test/org/apache/lucene/search/spans/TestSpanFirstQuery.java index 583da5191ab..aa02222909d 100644 --- a/lucene/src/test/org/apache/lucene/search/spans/TestSpanFirstQuery.java +++ b/lucene/src/test/org/apache/lucene/search/spans/TestSpanFirstQuery.java @@ -37,7 +37,7 @@ public class TestSpanFirstQuery extends LuceneTestCase { // mimic StopAnalyzer CharacterRunAutomaton stopSet = new CharacterRunAutomaton(new RegExp("the|a|of").toAutomaton()); - Analyzer analyzer = new MockAnalyzer(MockTokenizer.SIMPLE, true, stopSet, true); + Analyzer analyzer = new MockAnalyzer(random, MockTokenizer.SIMPLE, true, stopSet, true); RandomIndexWriter writer = new RandomIndexWriter(random, dir, analyzer); Document doc = new Document(); diff --git a/lucene/src/test/org/apache/lucene/search/spans/TestSpans.java b/lucene/src/test/org/apache/lucene/search/spans/TestSpans.java index fc7e0db69d2..04a54a440f5 100644 --- a/lucene/src/test/org/apache/lucene/search/spans/TestSpans.java +++ b/lucene/src/test/org/apache/lucene/search/spans/TestSpans.java @@ -55,7 +55,7 @@ public class TestSpans extends LuceneTestCase { public void setUp() throws Exception { super.setUp(); directory = newDirectory(); - RandomIndexWriter writer= new RandomIndexWriter(random, directory, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).setMergePolicy(newInOrderLogMergePolicy())); + RandomIndexWriter writer= new RandomIndexWriter(random, directory, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMergePolicy(newLogMergePolicy())); for (int i = 0; i < docFields.length; i++) { Document doc = new Document(); doc.add(newField(field, docFields[i], Field.Store.YES, Field.Index.ANALYZED)); @@ -481,7 +481,7 @@ public class TestSpans extends LuceneTestCase { public void testNPESpanQuery() throws Throwable { final Directory dir = newDirectory(); final IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig( - TEST_VERSION_CURRENT, new MockAnalyzer())); + TEST_VERSION_CURRENT, new MockAnalyzer(random))); // Add documents addDoc(writer, "1", "the big dogs went running to the market"); diff --git a/lucene/src/test/org/apache/lucene/search/spans/TestSpansAdvanced.java b/lucene/src/test/org/apache/lucene/search/spans/TestSpansAdvanced.java index e3e2e6774e0..7eea843b070 100644 --- a/lucene/src/test/org/apache/lucene/search/spans/TestSpansAdvanced.java +++ b/lucene/src/test/org/apache/lucene/search/spans/TestSpansAdvanced.java @@ -58,8 +58,8 @@ public class TestSpansAdvanced extends LuceneTestCase { mDirectory = newDirectory(); final RandomIndexWriter writer = new RandomIndexWriter(random, mDirectory, newIndexWriterConfig(TEST_VERSION_CURRENT, - new MockAnalyzer(MockTokenizer.SIMPLE, true, - MockTokenFilter.ENGLISH_STOPSET, true)).setMergePolicy(newInOrderLogMergePolicy())); + new MockAnalyzer(random, MockTokenizer.SIMPLE, true, + MockTokenFilter.ENGLISH_STOPSET, true)).setMergePolicy(newLogMergePolicy())); addDocument(writer, "1", "I think it should work."); addDocument(writer, "2", "I think it should work."); addDocument(writer, "3", "I think it should work."); diff --git a/lucene/src/test/org/apache/lucene/search/spans/TestSpansAdvanced2.java b/lucene/src/test/org/apache/lucene/search/spans/TestSpansAdvanced2.java index 6406bddf50c..288b062976e 100644 --- a/lucene/src/test/org/apache/lucene/search/spans/TestSpansAdvanced2.java +++ b/lucene/src/test/org/apache/lucene/search/spans/TestSpansAdvanced2.java @@ -46,9 +46,9 @@ public class TestSpansAdvanced2 extends TestSpansAdvanced { // create test index final RandomIndexWriter writer = new RandomIndexWriter(random, mDirectory, - newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer( + newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random, MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET, true)) - .setOpenMode(OpenMode.APPEND).setMergePolicy(newInOrderLogMergePolicy())); + .setOpenMode(OpenMode.APPEND).setMergePolicy(newLogMergePolicy())); addDocument(writer, "A", "Should we, could we, would we?"); addDocument(writer, "B", "It should. Should it?"); addDocument(writer, "C", "It shouldn't."); diff --git a/lucene/src/test/org/apache/lucene/store/TestBufferedIndexInput.java b/lucene/src/test/org/apache/lucene/store/TestBufferedIndexInput.java index 4a4c2780c80..cab6a66b266 100755 --- a/lucene/src/test/org/apache/lucene/store/TestBufferedIndexInput.java +++ b/lucene/src/test/org/apache/lucene/store/TestBufferedIndexInput.java @@ -240,12 +240,12 @@ public class TestBufferedIndexInput extends LuceneTestCase { } public void testSetBufferSize() throws IOException { - File indexDir = new File(TEMP_DIR, "testSetBufferSize"); + File indexDir = _TestUtil.getTempDir("testSetBufferSize"); MockFSDirectory dir = new MockFSDirectory(indexDir, random); try { IndexWriter writer = new IndexWriter( dir, - new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()). + new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)). setOpenMode(OpenMode.CREATE). setMergePolicy(newLogMergePolicy(false)) ); diff --git a/lucene/src/test/org/apache/lucene/store/TestDirectory.java b/lucene/src/test/org/apache/lucene/store/TestDirectory.java index a2aab82a689..03e6f150574 100644 --- a/lucene/src/test/org/apache/lucene/store/TestDirectory.java +++ b/lucene/src/test/org/apache/lucene/store/TestDirectory.java @@ -42,7 +42,7 @@ public class TestDirectory extends LuceneTestCase { // Test that different instances of FSDirectory can coexist on the same // path, can read, write, and lock files. public void testDirectInstantiation() throws Exception { - File path = new File(TEMP_DIR, "testDirectInstantiation"); + File path = _TestUtil.getTempDir("testDirectInstantiation"); int sz = 3; Directory[] dirs = new Directory[sz]; @@ -134,7 +134,7 @@ public class TestDirectory extends LuceneTestCase { // LUCENE-1468 public void testFSDirectoryFilter() throws IOException { - checkDirectoryFilter(newFSDirectory(new File(TEMP_DIR,"test"))); + checkDirectoryFilter(newFSDirectory(_TestUtil.getTempDir("test"))); } // LUCENE-1468 @@ -151,7 +151,7 @@ public class TestDirectory extends LuceneTestCase { // LUCENE-1468 public void testCopySubdir() throws Throwable { - File path = new File(TEMP_DIR, "testsubdir"); + File path = _TestUtil.getTempDir("testsubdir"); try { path.mkdirs(); new File(path, "subdir").mkdirs(); @@ -164,7 +164,7 @@ public class TestDirectory extends LuceneTestCase { // LUCENE-1468 public void testNotDirectory() throws Throwable { - File path = new File(TEMP_DIR, "testnotdir"); + File path = _TestUtil.getTempDir("testnotdir"); Directory fsDir = new SimpleFSDirectory(path, null); try { IndexOutput out = fsDir.createOutput("afile"); diff --git a/lucene/src/test/org/apache/lucene/store/TestFileSwitchDirectory.java b/lucene/src/test/org/apache/lucene/store/TestFileSwitchDirectory.java index 635970fd85c..75090b17072 100644 --- a/lucene/src/test/org/apache/lucene/store/TestFileSwitchDirectory.java +++ b/lucene/src/test/org/apache/lucene/store/TestFileSwitchDirectory.java @@ -47,7 +47,7 @@ public class TestFileSwitchDirectory extends LuceneTestCase { FileSwitchDirectory fsd = new FileSwitchDirectory(fileExtensions, primaryDir, secondaryDir, true); IndexWriter writer = new IndexWriter( fsd, - new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()). + new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)). setMergePolicy(newLogMergePolicy(false)) ); TestIndexWriterReader.createIndexNoClose(true, "ram", writer); diff --git a/lucene/src/test/org/apache/lucene/store/TestLockFactory.java b/lucene/src/test/org/apache/lucene/store/TestLockFactory.java index 8a4ca30478a..bddbccced0a 100755 --- a/lucene/src/test/org/apache/lucene/store/TestLockFactory.java +++ b/lucene/src/test/org/apache/lucene/store/TestLockFactory.java @@ -49,7 +49,7 @@ public class TestLockFactory extends LuceneTestCase { // Lock prefix should have been set: assertTrue("lock prefix was not set by the RAMDirectory", lf.lockPrefixSet); - IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer())); + IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random))); // add 100 documents (so that commit lock is used) for (int i = 0; i < 100; i++) { @@ -81,13 +81,13 @@ public class TestLockFactory extends LuceneTestCase { assertTrue("RAMDirectory.setLockFactory did not take", NoLockFactory.class.isInstance(dir.getLockFactory())); - IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer())); + IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random))); writer.commit(); // required so the second open succeed // Create a 2nd IndexWriter. This is normally not allowed but it should run through since we're not // using any locks: IndexWriter writer2 = null; try { - writer2 = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).setOpenMode(OpenMode.APPEND)); + writer2 = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setOpenMode(OpenMode.APPEND)); } catch (Exception e) { e.printStackTrace(System.out); fail("Should not have hit an IOException with no locking"); @@ -107,12 +107,12 @@ public class TestLockFactory extends LuceneTestCase { assertTrue("RAMDirectory did not use correct LockFactory: got " + dir.getLockFactory(), SingleInstanceLockFactory.class.isInstance(dir.getLockFactory())); - IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer())); + IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random))); // Create a 2nd IndexWriter. This should fail: IndexWriter writer2 = null; try { - writer2 = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).setOpenMode(OpenMode.APPEND)); + writer2 = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setOpenMode(OpenMode.APPEND)); fail("Should have hit an IOException with two IndexWriters on default SingleInstanceLockFactory"); } catch (IOException e) { } @@ -148,7 +148,7 @@ public class TestLockFactory extends LuceneTestCase { Directory dir = newFSDirectory(indexDir, lockFactory); // First create a 1 doc index: - IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).setOpenMode(OpenMode.CREATE)); + IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setOpenMode(OpenMode.CREATE)); addDoc(w); w.close(); @@ -280,7 +280,7 @@ public class TestLockFactory extends LuceneTestCase { IndexWriter writer = null; for(int i=0;i arc = fst.getFirstArc(new FST.Arc()); s.verifyStateAndBelow(fst, arc, 1); } + + // Make sure raw FST can differentiate between final vs + // non-final end nodes + public void testNonFinalStopNodes() throws Exception { + final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(true); + final Long nothing = outputs.getNoOutput(); + final Builder b = new Builder(FST.INPUT_TYPE.BYTE1, 0, 0, true, outputs); + + final FST fst = new FST(FST.INPUT_TYPE.BYTE1, outputs); + + final Builder.UnCompiledNode rootNode = new Builder.UnCompiledNode(b, 0); + + // Add final stop node + { + final Builder.UnCompiledNode node = new Builder.UnCompiledNode(b, 0); + node.isFinal = true; + rootNode.addArc('a', node); + final Builder.CompiledNode frozen = new Builder.CompiledNode(); + frozen.address = fst.addNode(node); + rootNode.arcs[0].nextFinalOutput = outputs.get(17); + rootNode.arcs[0].isFinal = true; + rootNode.arcs[0].output = nothing; + rootNode.arcs[0].target = frozen; + } + + // Add non-final stop node + { + final Builder.UnCompiledNode node = new Builder.UnCompiledNode(b, 0); + rootNode.addArc('b', node); + final Builder.CompiledNode frozen = new Builder.CompiledNode(); + frozen.address = fst.addNode(node); + rootNode.arcs[1].nextFinalOutput = nothing; + rootNode.arcs[1].output = outputs.get(42); + rootNode.arcs[1].target = frozen; + } + + fst.finish(fst.addNode(rootNode)); + + checkStopNodes(fst, outputs); + + // Make sure it still works after save/load: + Directory dir = newDirectory(); + IndexOutput out = dir.createOutput("fst"); + fst.save(out); + out.close(); + + IndexInput in = dir.openInput("fst"); + final FST fst2 = new FST(in, outputs); + checkStopNodes(fst2, outputs); + in.close(); + dir.close(); + } + + private void checkStopNodes(FST fst, PositiveIntOutputs outputs) throws Exception { + final Long nothing = outputs.getNoOutput(); + FST.Arc startArc = fst.getFirstArc(new FST.Arc()); + assertEquals(nothing, startArc.output); + assertEquals(nothing, startArc.nextFinalOutput); + + FST.Arc arc = fst.readFirstTargetArc(startArc, new FST.Arc()); + assertEquals('a', arc.label); + assertEquals(17, arc.nextFinalOutput.longValue()); + assertTrue(arc.isFinal()); + + arc = fst.readNextArc(arc); + assertEquals('b', arc.label); + assertFalse(arc.isFinal()); + assertEquals(42, arc.output.longValue()); + } } diff --git a/lucene/src/tools/java/org/apache/lucene/validation/LicenseType.java b/lucene/src/tools/java/org/apache/lucene/validation/LicenseType.java index 9b29168b0a3..36d5b0393c5 100644 --- a/lucene/src/tools/java/org/apache/lucene/validation/LicenseType.java +++ b/lucene/src/tools/java/org/apache/lucene/validation/LicenseType.java @@ -33,7 +33,8 @@ public enum LicenseType { MPL("Mozilla Public License", false), //NOT SURE on the required notice PD("Public Domain", false), //SUNBCLA("Sun Binary Code License Agreement"), - SUN("Sun Open Source License", false) + SUN("Sun Open Source License", false), + FAKE("FAKE license - not needed", false) ; private String display; diff --git a/modules/analysis/common/src/java/org/apache/lucene/analysis/de/GermanStemmer.java b/modules/analysis/common/src/java/org/apache/lucene/analysis/de/GermanStemmer.java index c20b94ee152..86e118f0663 100644 --- a/modules/analysis/common/src/java/org/apache/lucene/analysis/de/GermanStemmer.java +++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/de/GermanStemmer.java @@ -132,7 +132,8 @@ public class GermanStemmer strip( buffer ); } // Additional step for irregular plural nouns like "Matrizen -> Matrix". - if ( buffer.charAt( buffer.length() - 1 ) == ( 'z' ) ) { + // NOTE: this length constraint is probably not a great value, its just to prevent AIOOBE on empty terms + if ( buffer.length() > 0 && buffer.charAt( buffer.length() - 1 ) == ( 'z' ) ) { buffer.setCharAt( buffer.length() - 1, 'x' ); } } diff --git a/modules/analysis/common/src/java/org/apache/lucene/analysis/lv/LatvianAnalyzer.java b/modules/analysis/common/src/java/org/apache/lucene/analysis/lv/LatvianAnalyzer.java new file mode 100644 index 00000000000..3b6237d3c0c --- /dev/null +++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/lv/LatvianAnalyzer.java @@ -0,0 +1,129 @@ +package org.apache.lucene.analysis.lv; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.io.Reader; +import java.util.Set; + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.core.LowerCaseFilter; +import org.apache.lucene.analysis.core.StopFilter; +import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.Tokenizer; +import org.apache.lucene.analysis.standard.StandardFilter; +import org.apache.lucene.analysis.standard.StandardTokenizer; +import org.apache.lucene.analysis.util.CharArraySet; +import org.apache.lucene.analysis.util.StopwordAnalyzerBase; +import org.apache.lucene.analysis.util.WordlistLoader; +import org.apache.lucene.util.Version; + +/** + * {@link Analyzer} for Latvian. + */ +public final class LatvianAnalyzer extends StopwordAnalyzerBase { + private final Set stemExclusionSet; + + /** File containing default Latvian stopwords. */ + public final static String DEFAULT_STOPWORD_FILE = "stopwords.txt"; + + /** + * Returns an unmodifiable instance of the default stop words set. + * @return default stop words set. + */ + public static Set getDefaultStopSet(){ + return DefaultSetHolder.DEFAULT_STOP_SET; + } + + /** + * Atomically loads the DEFAULT_STOP_SET in a lazy fashion once the outer class + * accesses the static final set the first time.; + */ + private static class DefaultSetHolder { + static final Set DEFAULT_STOP_SET; + + static { + try { + DEFAULT_STOP_SET = WordlistLoader.getWordSet(LatvianAnalyzer.class, + DEFAULT_STOPWORD_FILE); + } catch (IOException ex) { + // default set should always be present as it is part of the + // distribution (JAR) + throw new RuntimeException("Unable to load default stopword set"); + } + } + } + + /** + * Builds an analyzer with the default stop words: {@link #DEFAULT_STOPWORD_FILE}. + */ + public LatvianAnalyzer(Version matchVersion) { + this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET); + } + + /** + * Builds an analyzer with the given stop words. + * + * @param matchVersion lucene compatibility version + * @param stopwords a stopword set + */ + public LatvianAnalyzer(Version matchVersion, Set stopwords) { + this(matchVersion, stopwords, CharArraySet.EMPTY_SET); + } + + /** + * Builds an analyzer with the given stop words. If a non-empty stem exclusion set is + * provided this analyzer will add a {@link KeywordMarkerFilter} before + * stemming. + * + * @param matchVersion lucene compatibility version + * @param stopwords a stopword set + * @param stemExclusionSet a set of terms not to be stemmed + */ + public LatvianAnalyzer(Version matchVersion, Set stopwords, Set stemExclusionSet) { + super(matchVersion, stopwords); + this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy( + matchVersion, stemExclusionSet)); + } + + /** + * Creates a + * {@link org.apache.lucene.analysis.util.ReusableAnalyzerBase.TokenStreamComponents} + * which tokenizes all the text in the provided {@link Reader}. + * + * @return A + * {@link org.apache.lucene.analysis.util.ReusableAnalyzerBase.TokenStreamComponents} + * built from an {@link StandardTokenizer} filtered with + * {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter} + * , {@link KeywordMarkerFilter} if a stem exclusion set is + * provided and {@link LatvianStemFilter}. + */ + @Override + protected TokenStreamComponents createComponents(String fieldName, + Reader reader) { + final Tokenizer source = new StandardTokenizer(matchVersion, reader); + TokenStream result = new StandardFilter(matchVersion, source); + result = new LowerCaseFilter(matchVersion, result); + result = new StopFilter(matchVersion, result, stopwords); + if(!stemExclusionSet.isEmpty()) + result = new KeywordMarkerFilter(result, stemExclusionSet); + result = new LatvianStemFilter(result); + return new TokenStreamComponents(source, result); + } +} diff --git a/modules/analysis/common/src/java/org/apache/lucene/analysis/lv/LatvianStemFilter.java b/modules/analysis/common/src/java/org/apache/lucene/analysis/lv/LatvianStemFilter.java new file mode 100644 index 00000000000..b39b39e236e --- /dev/null +++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/lv/LatvianStemFilter.java @@ -0,0 +1,58 @@ +package org.apache.lucene.analysis.lv; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; + +import org.apache.lucene.analysis.TokenFilter; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; +import org.apache.lucene.analysis.tokenattributes.KeywordAttribute; + +/** + * A {@link TokenFilter} that applies {@link LatvianStemmer} to stem Latvian + * words. + *

+ * To prevent terms from being stemmed use an instance of + * {@link KeywordMarkerFilter} or a custom {@link TokenFilter} that sets + * the {@link KeywordAttribute} before this {@link TokenStream}. + *

+ */ +public final class LatvianStemFilter extends TokenFilter { + private final LatvianStemmer stemmer = new LatvianStemmer(); + private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); + private final KeywordAttribute keywordAttr = addAttribute(KeywordAttribute.class); + + public LatvianStemFilter(TokenStream input) { + super(input); + } + + @Override + public boolean incrementToken() throws IOException { + if (input.incrementToken()) { + if (!keywordAttr.isKeyword()) { + final int newlen = stemmer.stem(termAtt.buffer(), termAtt.length()); + termAtt.setLength(newlen); + } + return true; + } else { + return false; + } + } +} diff --git a/modules/analysis/common/src/java/org/apache/lucene/analysis/lv/LatvianStemmer.java b/modules/analysis/common/src/java/org/apache/lucene/analysis/lv/LatvianStemmer.java new file mode 100644 index 00000000000..7d59fc0ae97 --- /dev/null +++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/lv/LatvianStemmer.java @@ -0,0 +1,174 @@ +package org.apache.lucene.analysis.lv; + +import static org.apache.lucene.analysis.util.StemmerUtil.*; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Light stemmer for Latvian. + *

+ * This is a light version of the algorithm in Karlis Kreslin's PhD thesis + * A stemming algorithm for Latvian with the following modifications: + *

    + *
  • Only explicitly stems noun and adjective morphology + *
  • Stricter length/vowel checks for the resulting stems (verb etc suffix stripping is removed) + *
  • Removes only the primary inflectional suffixes: case and number for nouns ; + * case, number, gender, and definitiveness for adjectives. + *
  • Palatalization is only handled when a declension II,V,VI noun suffix is removed. + *
+ */ +public class LatvianStemmer { + /** + * Stem a latvian word. returns the new adjusted length. + */ + public int stem(char s[], int len) { + int numVowels = numVowels(s, len); + + for (int i = 0; i < affixes.length; i++) { + Affix affix = affixes[i]; + if (numVowels > affix.vc && len >= affix.affix.length + 3 && endsWith(s, len, affix.affix)) { + len -= affix.affix.length; + return affix.palatalizes ? unpalatalize(s, len) : len; + } + } + + return len; + } + + static final Affix affixes[] = { + new Affix("ajiem", 3, false), new Affix("ajai", 3, false), + new Affix("ajam", 2, false), new Affix("ajām", 2, false), + new Affix("ajos", 2, false), new Affix("ajās", 2, false), + new Affix("iem", 2, true), new Affix("ajā", 2, false), + new Affix("ais", 2, false), new Affix("ai", 2, false), + new Affix("ei", 2, false), new Affix("ām", 1, false), + new Affix("am", 1, false), new Affix("ēm", 1, false), + new Affix("īm", 1, false), new Affix("im", 1, false), + new Affix("um", 1, false), new Affix("us", 1, true), + new Affix("as", 1, false), new Affix("ās", 1, false), + new Affix("es", 1, false), new Affix("os", 1, true), + new Affix("ij", 1, false), new Affix("īs", 1, false), + new Affix("ēs", 1, false), new Affix("is", 1, false), + new Affix("ie", 1, false), new Affix("u", 1, true), + new Affix("a", 1, true), new Affix("i", 1, true), + new Affix("e", 1, false), new Affix("ā", 1, false), + new Affix("ē", 1, false), new Affix("ī", 1, false), + new Affix("ū", 1, false), new Affix("o", 1, false), + new Affix("s", 0, false), new Affix("š", 0, false), + }; + + static class Affix { + char affix[]; // suffix + int vc; // vowel count of the suffix + boolean palatalizes; // true if we should fire palatalization rules. + + Affix(String affix, int vc, boolean palatalizes) { + this.affix = affix.toCharArray(); + this.vc = vc; + this.palatalizes = palatalizes; + } + } + + /** + * Most cases are handled except for the ambiguous ones: + *
    + *
  • s -> š + *
  • t -> š + *
  • d -> ž + *
  • z -> ž + *
+ */ + private int unpalatalize(char s[], int len) { + // we check the character removed: if its -u then + // its 2,5, or 6 gen pl., and these two can only apply then. + if (s[len] == 'u') { + // kš -> kst + if (endsWith(s, len, "kš")) { + len++; + s[len-2] = 's'; + s[len-1] = 't'; + return len; + } + // ņņ -> nn + if (endsWith(s, len, "ņņ")) { + s[len-2] = 'n'; + s[len-1] = 'n'; + return len; + } + } + + // otherwise all other rules + if (endsWith(s, len, "pj") || endsWith(s, len, "bj") + || endsWith(s, len, "mj") || endsWith(s, len, "vj")) { + // labial consonant + return len-1; + } else if (endsWith(s, len, "šņ")) { + s[len-2] = 's'; + s[len-1] = 'n'; + return len; + } else if (endsWith(s, len, "žņ")) { + s[len-2] = 'z'; + s[len-1] = 'n'; + return len; + } else if (endsWith(s, len, "šļ")) { + s[len-2] = 's'; + s[len-1] = 'l'; + return len; + } else if (endsWith(s, len, "žļ")) { + s[len-2] = 'z'; + s[len-1] = 'l'; + return len; + } else if (endsWith(s, len, "ļņ")) { + s[len-2] = 'l'; + s[len-1] = 'n'; + return len; + } else if (endsWith(s, len, "ļļ")) { + s[len-2] = 'l'; + s[len-1] = 'l'; + return len; + } else if (s[len-1] == 'č') { + s[len-1] = 'c'; + return len; + } else if (s[len-1] == 'ļ') { + s[len-1] = 'l'; + return len; + } else if (s[len-1] == 'ņ') { + s[len-1] = 'n'; + return len; + } + + return len; + } + + /** + * Count the vowels in the string, we always require at least + * one in the remaining stem to accept it. + */ + private int numVowels(char s[], int len) { + int n = 0; + for (int i = 0; i < len; i++) { + switch(s[i]) { + case 'a': case 'e': case 'i': + case 'o': case 'u': case 'ā': + case 'ī': case 'ē': case 'ū': + n++; + } + } + return n; + } +} diff --git a/modules/analysis/common/src/java/org/apache/lucene/analysis/lv/package.html b/modules/analysis/common/src/java/org/apache/lucene/analysis/lv/package.html new file mode 100644 index 00000000000..add7df6d00f --- /dev/null +++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/lv/package.html @@ -0,0 +1,22 @@ + + + + +Analyzer for Latvian. + + diff --git a/modules/analysis/common/src/java/org/apache/lucene/analysis/th/ThaiWordFilter.java b/modules/analysis/common/src/java/org/apache/lucene/analysis/th/ThaiWordFilter.java index 8f0935c11e7..5f3b7c79988 100644 --- a/modules/analysis/common/src/java/org/apache/lucene/analysis/th/ThaiWordFilter.java +++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/th/ThaiWordFilter.java @@ -131,5 +131,8 @@ public final class ThaiWordFilter extends TokenFilter { public void reset() throws IOException { super.reset(); hasMoreTokensInClone = false; + clonedToken = null; + clonedTermAtt = null; + clonedOffsetAtt = null; } } diff --git a/modules/analysis/common/src/resources/org/apache/lucene/analysis/lv/stopwords.txt b/modules/analysis/common/src/resources/org/apache/lucene/analysis/lv/stopwords.txt new file mode 100644 index 00000000000..e21a23c06c3 --- /dev/null +++ b/modules/analysis/common/src/resources/org/apache/lucene/analysis/lv/stopwords.txt @@ -0,0 +1,172 @@ +# Set of Latvian stopwords from A Stemming Algorithm for Latvian, Karlis Kreslins +# the original list of over 800 forms was refined: +# pronouns, adverbs, interjections were removed +# +# prepositions +aiz +ap +ar +apakš +ārpus +augšpus +bez +caur +dēļ +gar +iekš +iz +kopš +labad +lejpus +līdz +no +otrpus +pa +par +pār +pēc +pie +pirms +pret +priekš +starp +šaipus +uz +viņpus +virs +virspus +zem +apakšpus +# Conjunctions +un +bet +jo +ja +ka +lai +tomēr +tikko +turpretī +arī +kaut +gan +tādēļ +tā +ne +tikvien +vien +kā +ir +te +vai +kamēr +# Particles +ar +diezin +droši +diemžēl +nebūt +ik +it +taču +nu +pat +tiklab +iekšpus +nedz +tik +nevis +turpretim +jeb +iekam +iekām +iekāms +kolīdz +līdzko +tiklīdz +jebšu +tālab +tāpēc +nekā +itin +jā +jau +jel +nē +nezin +tad +tikai +vis +tak +iekams +vien +# modal verbs +būt +biju +biji +bija +bijām +bijāt +esmu +esi +esam +esat +būšu +būsi +būs +būsim +būsiet +tikt +tiku +tiki +tika +tikām +tikāt +tieku +tiec +tiek +tiekam +tiekat +tikšu +tiks +tiksim +tiksiet +tapt +tapi +tapāt +topat +tapšu +tapsi +taps +tapsim +tapsiet +kļūt +kļuvu +kļuvi +kļuva +kļuvām +kļuvāt +kļūstu +kļūsti +kļūst +kļūstam +kļūstat +kļūšu +kļūsi +kļūs +kļūsim +kļūsiet +# verbs +varēt +varēju +varējām +varēšu +varēsim +var +varēji +varējāt +varēsi +varēsiet +varat +varēja +varēs diff --git a/modules/analysis/common/src/test/org/apache/lucene/analysis/ar/TestArabicAnalyzer.java b/modules/analysis/common/src/test/org/apache/lucene/analysis/ar/TestArabicAnalyzer.java index b417bd59bc3..d365cba19e5 100644 --- a/modules/analysis/common/src/test/org/apache/lucene/analysis/ar/TestArabicAnalyzer.java +++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/ar/TestArabicAnalyzer.java @@ -98,4 +98,9 @@ public class TestArabicAnalyzer extends BaseTokenStreamTestCase { assertAnalyzesTo(a, "كبيرة the quick ساهدهات", new String[] { "كبير","the", "quick", "ساهد" }); assertAnalyzesToReuse(a, "كبيرة the quick ساهدهات", new String[] { "كبير","the", "quick", "ساهد" }); } + + /** blast some random strings through the analyzer */ + public void testRandomStrings() throws Exception { + checkRandomData(random, new ArabicAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER); + } } diff --git a/modules/analysis/common/src/test/org/apache/lucene/analysis/bg/TestBulgarianAnalyzer.java b/modules/analysis/common/src/test/org/apache/lucene/analysis/bg/TestBulgarianAnalyzer.java index a1eccaa6d94..2832b1697d8 100644 --- a/modules/analysis/common/src/test/org/apache/lucene/analysis/bg/TestBulgarianAnalyzer.java +++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/bg/TestBulgarianAnalyzer.java @@ -75,4 +75,9 @@ public class TestBulgarianAnalyzer extends BaseTokenStreamTestCase { Analyzer a = new BulgarianAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET, set); assertAnalyzesTo(a, "строевете строеве", new String[] { "строй", "строеве" }); } + + /** blast some random strings through the analyzer */ + public void testRandomStrings() throws Exception { + checkRandomData(random, new BulgarianAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER); + } } diff --git a/modules/analysis/common/src/test/org/apache/lucene/analysis/br/TestBrazilianStemmer.java b/modules/analysis/common/src/test/org/apache/lucene/analysis/br/TestBrazilianStemmer.java index b21e35f0823..80f6ab1fe3a 100644 --- a/modules/analysis/common/src/test/org/apache/lucene/analysis/br/TestBrazilianStemmer.java +++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/br/TestBrazilianStemmer.java @@ -157,4 +157,8 @@ public class TestBrazilianStemmer extends BaseTokenStreamTestCase { checkOneTermReuse(a, input, expected); } + /** blast some random strings through the analyzer */ + public void testRandomStrings() throws Exception { + checkRandomData(random, new BrazilianAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER); + } } \ No newline at end of file diff --git a/modules/analysis/common/src/test/org/apache/lucene/analysis/ca/TestCatalanAnalyzer.java b/modules/analysis/common/src/test/org/apache/lucene/analysis/ca/TestCatalanAnalyzer.java index e54f0a30709..307194b27d9 100644 --- a/modules/analysis/common/src/test/org/apache/lucene/analysis/ca/TestCatalanAnalyzer.java +++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/ca/TestCatalanAnalyzer.java @@ -50,4 +50,9 @@ public class TestCatalanAnalyzer extends BaseTokenStreamTestCase { checkOneTermReuse(a, "llengües", "llengües"); checkOneTermReuse(a, "llengua", "llengu"); } + + /** blast some random strings through the analyzer */ + public void testRandomStrings() throws Exception { + checkRandomData(random, new CatalanAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER); + } } diff --git a/modules/analysis/common/src/test/org/apache/lucene/analysis/cjk/TestCJKTokenizer.java b/modules/analysis/common/src/test/org/apache/lucene/analysis/cjk/TestCJKTokenizer.java index 18c208eb80c..b48cf635459 100644 --- a/modules/analysis/common/src/test/org/apache/lucene/analysis/cjk/TestCJKTokenizer.java +++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/cjk/TestCJKTokenizer.java @@ -270,4 +270,9 @@ public class TestCJKTokenizer extends BaseTokenStreamTestCase { newToken("test", 0, 4, CJKTokenizer.SINGLE_TOKEN_TYPE), newToken("あい", 4, 6, CJKTokenizer.DOUBLE_TOKEN_TYPE) }); } + + /** blast some random strings through the analyzer */ + public void testRandomStrings() throws Exception { + checkRandomData(random, new CJKAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER); + } } diff --git a/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestAnalyzers.java b/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestAnalyzers.java index 6f3b862c61b..813c1195745 100644 --- a/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestAnalyzers.java +++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestAnalyzers.java @@ -210,6 +210,13 @@ public class TestAnalyzers extends BaseTokenStreamTestCase { assertTokenStreamContents(tokenizer, new String[] { "Tokenizer", "\ud801\udc1ctest" }); } + + /** blast some random strings through the analyzer */ + public void testRandomStrings() throws Exception { + checkRandomData(random, new WhitespaceAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER); + checkRandomData(random, new SimpleAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER); + checkRandomData(random, new StopAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER); + } } final class PayloadSetter extends TokenFilter { diff --git a/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestClassicAnalyzer.java b/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestClassicAnalyzer.java index 4cd9f9f3ed2..f6deee59b50 100644 --- a/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestClassicAnalyzer.java +++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestClassicAnalyzer.java @@ -309,4 +309,9 @@ public class TestClassicAnalyzer extends BaseTokenStreamTestCase { dir.close(); } + + /** blast some random strings through the analyzer */ + public void testRandomStrings() throws Exception { + checkRandomData(random, new ClassicAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER); + } } diff --git a/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestKeywordAnalyzer.java b/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestKeywordAnalyzer.java index 5dc33fd3482..0ee6e4c69f2 100644 --- a/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestKeywordAnalyzer.java +++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestKeywordAnalyzer.java @@ -102,4 +102,9 @@ public class TestKeywordAnalyzer extends BaseTokenStreamTestCase { assertEquals(0, offsetAtt.startOffset()); assertEquals(4, offsetAtt.endOffset()); } + + /** blast some random strings through the analyzer */ + public void testRandomStrings() throws Exception { + checkRandomData(random, new KeywordAnalyzer(), 10000*RANDOM_MULTIPLIER); + } } diff --git a/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestStandardAnalyzer.java b/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestStandardAnalyzer.java index 53b61c7c639..17bae9e24e5 100644 --- a/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestStandardAnalyzer.java +++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestStandardAnalyzer.java @@ -3,6 +3,7 @@ package org.apache.lucene.analysis.core; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.BaseTokenStreamTestCase; import org.apache.lucene.analysis.Tokenizer; +import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.analysis.standard.StandardTokenizer; import org.apache.lucene.analysis.util.ReusableAnalyzerBase; @@ -219,4 +220,9 @@ public class TestStandardAnalyzer extends BaseTokenStreamTestCase { new String[] { "仮", "名", "遣", "い", "カタカナ" }, new String[] { "", "", "", "", "" }); } + + /** blast some random strings through the analyzer */ + public void testRandomStrings() throws Exception { + checkRandomData(random, new StandardAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER); + } } diff --git a/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestUAX29URLEmailTokenizer.java b/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestUAX29URLEmailTokenizer.java index e33af62fc0a..3078208de11 100644 --- a/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestUAX29URLEmailTokenizer.java +++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestUAX29URLEmailTokenizer.java @@ -418,4 +418,9 @@ public class TestUAX29URLEmailTokenizer extends BaseTokenStreamTestCase { new String[] { "仮", "名", "遣", "い", "カタカナ" }, new String[] { "", "", "", "", "" }); } + + /** blast some random strings through the analyzer */ + public void testRandomStrings() throws Exception { + checkRandomData(random, a, 10000*RANDOM_MULTIPLIER); + } } diff --git a/modules/analysis/common/src/test/org/apache/lucene/analysis/cz/TestCzechAnalyzer.java b/modules/analysis/common/src/test/org/apache/lucene/analysis/cz/TestCzechAnalyzer.java index f9c72663a36..d728bc852f4 100644 --- a/modules/analysis/common/src/test/org/apache/lucene/analysis/cz/TestCzechAnalyzer.java +++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/cz/TestCzechAnalyzer.java @@ -67,4 +67,9 @@ public class TestCzechAnalyzer extends BaseTokenStreamTestCase { CzechAnalyzer cz = new CzechAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET, set); assertAnalyzesTo(cz, "hole desek", new String[] {"hole", "desk"}); } + + /** blast some random strings through the analyzer */ + public void testRandomStrings() throws Exception { + checkRandomData(random, new CzechAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER); + } } diff --git a/modules/analysis/common/src/test/org/apache/lucene/analysis/da/TestDanishAnalyzer.java b/modules/analysis/common/src/test/org/apache/lucene/analysis/da/TestDanishAnalyzer.java index cf38a1786e8..e7863b0503b 100644 --- a/modules/analysis/common/src/test/org/apache/lucene/analysis/da/TestDanishAnalyzer.java +++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/da/TestDanishAnalyzer.java @@ -50,4 +50,9 @@ public class TestDanishAnalyzer extends BaseTokenStreamTestCase { checkOneTermReuse(a, "undersøgelse", "undersøgelse"); checkOneTermReuse(a, "undersøg", "undersøg"); } + + /** blast some random strings through the analyzer */ + public void testRandomStrings() throws Exception { + checkRandomData(random, new DanishAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER); + } } diff --git a/modules/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanAnalyzer.java b/modules/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanAnalyzer.java index b329298a774..c3bc23f3483 100644 --- a/modules/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanAnalyzer.java +++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanAnalyzer.java @@ -63,4 +63,9 @@ public class TestGermanAnalyzer extends BaseTokenStreamTestCase { checkOneTermReuse(a, "Schaltflächen", "schaltflach"); checkOneTermReuse(a, "Schaltflaechen", "schaltflaech"); } + + /** blast some random strings through the analyzer */ + public void testRandomStrings() throws Exception { + checkRandomData(random, new GermanAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER); + } } diff --git a/modules/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanLightStemFilter.java b/modules/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanLightStemFilter.java index 63dfdb6c4cc..94cbb5fac9d 100644 --- a/modules/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanLightStemFilter.java +++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanLightStemFilter.java @@ -45,4 +45,9 @@ public class TestGermanLightStemFilter extends BaseTokenStreamTestCase { public void testVocabulary() throws IOException { assertVocabulary(analyzer, getDataFile("delighttestdata.zip"), "delight.txt"); } + + /** blast some random strings through the analyzer */ + public void testRandomStrings() throws Exception { + checkRandomData(random, analyzer, 10000*RANDOM_MULTIPLIER); + } } diff --git a/modules/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanMinimalStemFilter.java b/modules/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanMinimalStemFilter.java index c14c7ea4076..984a563eca7 100644 --- a/modules/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanMinimalStemFilter.java +++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanMinimalStemFilter.java @@ -57,4 +57,9 @@ public class TestGermanMinimalStemFilter extends BaseTokenStreamTestCase { public void testVocabulary() throws IOException { assertVocabulary(analyzer, getDataFile("deminimaltestdata.zip"), "deminimal.txt"); } + + /** blast some random strings through the analyzer */ + public void testRandomStrings() throws Exception { + checkRandomData(random, analyzer, 10000*RANDOM_MULTIPLIER); + } } diff --git a/modules/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanStemFilter.java b/modules/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanStemFilter.java index d7602aa47c9..510a5adc327 100644 --- a/modules/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanStemFilter.java +++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanStemFilter.java @@ -36,20 +36,30 @@ import static org.apache.lucene.analysis.util.VocabularyAssert.*; * */ public class TestGermanStemFilter extends BaseTokenStreamTestCase { + Analyzer analyzer = new ReusableAnalyzerBase() { + @Override + protected TokenStreamComponents createComponents(String fieldName, + Reader reader) { + Tokenizer t = new KeywordTokenizer(reader); + return new TokenStreamComponents(t, + new GermanStemFilter(new LowerCaseFilter(TEST_VERSION_CURRENT, t))); + } + }; - public void testStemming() throws Exception { - Analyzer analyzer = new ReusableAnalyzerBase() { - @Override - protected TokenStreamComponents createComponents(String fieldName, - Reader reader) { - Tokenizer t = new KeywordTokenizer(reader); - return new TokenStreamComponents(t, - new GermanStemFilter(new LowerCaseFilter(TEST_VERSION_CURRENT, t))); - } - }; - + public void testStemming() throws Exception { InputStream vocOut = getClass().getResourceAsStream("data.txt"); assertVocabulary(analyzer, vocOut); vocOut.close(); } + + // LUCENE-3043: we use keywordtokenizer in this test, + // so ensure the stemmer does not crash on zero-length strings. + public void testEmpty() throws Exception { + assertAnalyzesTo(analyzer, "", new String[] { "" }); + } + + /** blast some random strings through the analyzer */ + public void testRandomStrings() throws Exception { + checkRandomData(random, analyzer, 10000*RANDOM_MULTIPLIER); + } } diff --git a/modules/analysis/common/src/test/org/apache/lucene/analysis/el/GreekAnalyzerTest.java b/modules/analysis/common/src/test/org/apache/lucene/analysis/el/GreekAnalyzerTest.java index d8a1f69765a..88e924e9143 100644 --- a/modules/analysis/common/src/test/org/apache/lucene/analysis/el/GreekAnalyzerTest.java +++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/el/GreekAnalyzerTest.java @@ -87,4 +87,9 @@ public class GreekAnalyzerTest extends BaseTokenStreamTestCase { assertAnalyzesToReuse(a, "ΠΡΟΫΠΟΘΕΣΕΙΣ Άψογος, ο μεστός και οι άλλοι", new String[] { "προυποθεσ", "αψογ", "μεστ", "αλλ" }); } - } + + /** blast some random strings through the analyzer */ + public void testRandomStrings() throws Exception { + checkRandomData(random, new GreekAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER); + } +} diff --git a/modules/analysis/common/src/test/org/apache/lucene/analysis/en/TestEnglishAnalyzer.java b/modules/analysis/common/src/test/org/apache/lucene/analysis/en/TestEnglishAnalyzer.java index ee1a6da09f4..b10ae03f268 100644 --- a/modules/analysis/common/src/test/org/apache/lucene/analysis/en/TestEnglishAnalyzer.java +++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/en/TestEnglishAnalyzer.java @@ -52,4 +52,9 @@ public class TestEnglishAnalyzer extends BaseTokenStreamTestCase { checkOneTermReuse(a, "books", "books"); checkOneTermReuse(a, "book", "book"); } + + /** blast some random strings through the analyzer */ + public void testRandomStrings() throws Exception { + checkRandomData(random, new EnglishAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER); + } } diff --git a/modules/analysis/common/src/test/org/apache/lucene/analysis/en/TestEnglishMinimalStemFilter.java b/modules/analysis/common/src/test/org/apache/lucene/analysis/en/TestEnglishMinimalStemFilter.java index 8ff0303b47d..ebc65cb3e88 100644 --- a/modules/analysis/common/src/test/org/apache/lucene/analysis/en/TestEnglishMinimalStemFilter.java +++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/en/TestEnglishMinimalStemFilter.java @@ -51,4 +51,9 @@ public class TestEnglishMinimalStemFilter extends BaseTokenStreamTestCase { checkOneTerm(analyzer, "congress", "congress"); checkOneTerm(analyzer, "serious", "serious"); } + + /** blast some random strings through the analyzer */ + public void testRandomStrings() throws Exception { + checkRandomData(random, analyzer, 10000*RANDOM_MULTIPLIER); + } } diff --git a/modules/analysis/common/src/test/org/apache/lucene/analysis/en/TestPorterStemFilter.java b/modules/analysis/common/src/test/org/apache/lucene/analysis/en/TestPorterStemFilter.java index e34829a27bd..3d5880748aa 100644 --- a/modules/analysis/common/src/test/org/apache/lucene/analysis/en/TestPorterStemFilter.java +++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/en/TestPorterStemFilter.java @@ -36,21 +36,21 @@ import static org.apache.lucene.analysis.util.VocabularyAssert.*; /** * Test the PorterStemFilter with Martin Porter's test data. */ -public class TestPorterStemFilter extends BaseTokenStreamTestCase { +public class TestPorterStemFilter extends BaseTokenStreamTestCase { + Analyzer a = new ReusableAnalyzerBase() { + @Override + protected TokenStreamComponents createComponents(String fieldName, + Reader reader) { + Tokenizer t = new KeywordTokenizer(reader); + return new TokenStreamComponents(t, new PorterStemFilter(t)); + } + }; + /** * Run the stemmer against all strings in voc.txt * The output should be the same as the string in output.txt */ public void testPorterStemFilter() throws Exception { - Analyzer a = new ReusableAnalyzerBase() { - @Override - protected TokenStreamComponents createComponents(String fieldName, - Reader reader) { - Tokenizer t = new KeywordTokenizer(reader); - return new TokenStreamComponents(t, new PorterStemFilter(t)); - } - }; - assertVocabulary(a, getDataFile("porterTestData.zip"), "voc.txt", "output.txt"); } @@ -61,4 +61,9 @@ public class TestPorterStemFilter extends BaseTokenStreamTestCase { TokenStream filter = new PorterStemFilter(new KeywordMarkerFilter(tokenizer, set)); assertTokenStreamContents(filter, new String[] {"yourselves", "your"}); } + + /** blast some random strings through the analyzer */ + public void testRandomStrings() throws Exception { + checkRandomData(random, a, 10000*RANDOM_MULTIPLIER); + } } diff --git a/modules/analysis/common/src/test/org/apache/lucene/analysis/es/TestSpanishAnalyzer.java b/modules/analysis/common/src/test/org/apache/lucene/analysis/es/TestSpanishAnalyzer.java index 687573cd027..a41c8efca6c 100644 --- a/modules/analysis/common/src/test/org/apache/lucene/analysis/es/TestSpanishAnalyzer.java +++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/es/TestSpanishAnalyzer.java @@ -50,4 +50,9 @@ public class TestSpanishAnalyzer extends BaseTokenStreamTestCase { checkOneTermReuse(a, "chicana", "chican"); checkOneTermReuse(a, "chicano", "chicano"); } + + /** blast some random strings through the analyzer */ + public void testRandomStrings() throws Exception { + checkRandomData(random, new SpanishAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER); + } } diff --git a/modules/analysis/common/src/test/org/apache/lucene/analysis/es/TestSpanishLightStemFilter.java b/modules/analysis/common/src/test/org/apache/lucene/analysis/es/TestSpanishLightStemFilter.java index f494bd65725..f8dd991d19a 100644 --- a/modules/analysis/common/src/test/org/apache/lucene/analysis/es/TestSpanishLightStemFilter.java +++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/es/TestSpanishLightStemFilter.java @@ -45,4 +45,9 @@ public class TestSpanishLightStemFilter extends BaseTokenStreamTestCase { public void testVocabulary() throws IOException { assertVocabulary(analyzer, getDataFile("eslighttestdata.zip"), "eslight.txt"); } + + /** blast some random strings through the analyzer */ + public void testRandomStrings() throws Exception { + checkRandomData(random, analyzer, 10000*RANDOM_MULTIPLIER); + } } diff --git a/modules/analysis/common/src/test/org/apache/lucene/analysis/eu/TestBasqueAnalyzer.java b/modules/analysis/common/src/test/org/apache/lucene/analysis/eu/TestBasqueAnalyzer.java index 591a09be504..5c11deb02e9 100644 --- a/modules/analysis/common/src/test/org/apache/lucene/analysis/eu/TestBasqueAnalyzer.java +++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/eu/TestBasqueAnalyzer.java @@ -50,4 +50,9 @@ public class TestBasqueAnalyzer extends BaseTokenStreamTestCase { checkOneTermReuse(a, "zaldiak", "zaldiak"); checkOneTermReuse(a, "mendiari", "mendi"); } + + /** blast some random strings through the analyzer */ + public void testRandomStrings() throws Exception { + checkRandomData(random, new BasqueAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER); + } } diff --git a/modules/analysis/common/src/test/org/apache/lucene/analysis/fa/TestPersianAnalyzer.java b/modules/analysis/common/src/test/org/apache/lucene/analysis/fa/TestPersianAnalyzer.java index d38d0f64b64..e4ef8942f59 100644 --- a/modules/analysis/common/src/test/org/apache/lucene/analysis/fa/TestPersianAnalyzer.java +++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/fa/TestPersianAnalyzer.java @@ -219,4 +219,9 @@ public class TestPersianAnalyzer extends BaseTokenStreamTestCase { assertAnalyzesTo(a, "The quick brown fox.", new String[] { "quick", "brown", "fox" }); } + + /** blast some random strings through the analyzer */ + public void testRandomStrings() throws Exception { + checkRandomData(random, new PersianAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER); + } } diff --git a/modules/analysis/common/src/test/org/apache/lucene/analysis/fi/TestFinnishAnalyzer.java b/modules/analysis/common/src/test/org/apache/lucene/analysis/fi/TestFinnishAnalyzer.java index 379b0257575..35b67a278aa 100644 --- a/modules/analysis/common/src/test/org/apache/lucene/analysis/fi/TestFinnishAnalyzer.java +++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/fi/TestFinnishAnalyzer.java @@ -50,4 +50,9 @@ public class TestFinnishAnalyzer extends BaseTokenStreamTestCase { checkOneTermReuse(a, "edeltäjiinsä", "edeltäj"); checkOneTermReuse(a, "edeltäjistään", "edeltäjistään"); } + + /** blast some random strings through the analyzer */ + public void testRandomStrings() throws Exception { + checkRandomData(random, new FinnishAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER); + } } diff --git a/modules/analysis/common/src/test/org/apache/lucene/analysis/fi/TestFinnishLightStemFilter.java b/modules/analysis/common/src/test/org/apache/lucene/analysis/fi/TestFinnishLightStemFilter.java index d946a20ca53..4924a4070c4 100644 --- a/modules/analysis/common/src/test/org/apache/lucene/analysis/fi/TestFinnishLightStemFilter.java +++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/fi/TestFinnishLightStemFilter.java @@ -45,4 +45,9 @@ public class TestFinnishLightStemFilter extends BaseTokenStreamTestCase { public void testVocabulary() throws IOException { assertVocabulary(analyzer, getDataFile("filighttestdata.zip"), "filight.txt"); } + + /** blast some random strings through the analyzer */ + public void testRandomStrings() throws Exception { + checkRandomData(random, analyzer, 10000*RANDOM_MULTIPLIER); + } } diff --git a/modules/analysis/common/src/test/org/apache/lucene/analysis/fr/TestFrenchAnalyzer.java b/modules/analysis/common/src/test/org/apache/lucene/analysis/fr/TestFrenchAnalyzer.java index 83664627475..2cfb6bfb112 100644 --- a/modules/analysis/common/src/test/org/apache/lucene/analysis/fr/TestFrenchAnalyzer.java +++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/fr/TestFrenchAnalyzer.java @@ -260,4 +260,9 @@ public class TestFrenchAnalyzer extends BaseTokenStreamTestCase { FrenchAnalyzer a = new FrenchAnalyzer(Version.LUCENE_31); assertAnalyzesTo(a, "Votre", new String[] { }); } + + /** blast some random strings through the analyzer */ + public void testRandomStrings() throws Exception { + checkRandomData(random, new FrenchAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER); + } } diff --git a/modules/analysis/common/src/test/org/apache/lucene/analysis/fr/TestFrenchLightStemFilter.java b/modules/analysis/common/src/test/org/apache/lucene/analysis/fr/TestFrenchLightStemFilter.java index ffe8d6c22cc..4cd9f79ced2 100644 --- a/modules/analysis/common/src/test/org/apache/lucene/analysis/fr/TestFrenchLightStemFilter.java +++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/fr/TestFrenchLightStemFilter.java @@ -159,4 +159,9 @@ public class TestFrenchLightStemFilter extends BaseTokenStreamTestCase { public void testVocabulary() throws IOException { assertVocabulary(analyzer, getDataFile("frlighttestdata.zip"), "frlight.txt"); } + + /** blast some random strings through the analyzer */ + public void testRandomStrings() throws Exception { + checkRandomData(random, analyzer, 10000*RANDOM_MULTIPLIER); + } } diff --git a/modules/analysis/common/src/test/org/apache/lucene/analysis/fr/TestFrenchMinimalStemFilter.java b/modules/analysis/common/src/test/org/apache/lucene/analysis/fr/TestFrenchMinimalStemFilter.java index b45c5323c82..5830788954b 100644 --- a/modules/analysis/common/src/test/org/apache/lucene/analysis/fr/TestFrenchMinimalStemFilter.java +++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/fr/TestFrenchMinimalStemFilter.java @@ -59,4 +59,9 @@ public class TestFrenchMinimalStemFilter extends BaseTokenStreamTestCase { public void testVocabulary() throws IOException { assertVocabulary(analyzer, getDataFile("frminimaltestdata.zip"), "frminimal.txt"); } + + /** blast some random strings through the analyzer */ + public void testRandomStrings() throws Exception { + checkRandomData(random, analyzer, 10000*RANDOM_MULTIPLIER); + } } diff --git a/modules/analysis/common/src/test/org/apache/lucene/analysis/gl/TestGalicianAnalyzer.java b/modules/analysis/common/src/test/org/apache/lucene/analysis/gl/TestGalicianAnalyzer.java index b67bf087713..0264427c444 100644 --- a/modules/analysis/common/src/test/org/apache/lucene/analysis/gl/TestGalicianAnalyzer.java +++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/gl/TestGalicianAnalyzer.java @@ -50,4 +50,9 @@ public class TestGalicianAnalyzer extends BaseTokenStreamTestCase { checkOneTermReuse(a, "correspondente", "correspondente"); checkOneTermReuse(a, "corresponderá", "correspond"); } + + /** blast some random strings through the analyzer */ + public void testRandomStrings() throws Exception { + checkRandomData(random, new GalicianAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER); + } } diff --git a/modules/analysis/common/src/test/org/apache/lucene/analysis/hi/TestHindiAnalyzer.java b/modules/analysis/common/src/test/org/apache/lucene/analysis/hi/TestHindiAnalyzer.java index dcebfc34400..343a52b8fdd 100644 --- a/modules/analysis/common/src/test/org/apache/lucene/analysis/hi/TestHindiAnalyzer.java +++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/hi/TestHindiAnalyzer.java @@ -47,4 +47,9 @@ public class TestHindiAnalyzer extends BaseTokenStreamTestCase { HindiAnalyzer.getDefaultStopSet(), exclusionSet); checkOneTermReuse(a, "हिंदी", "हिंदी"); } + + /** blast some random strings through the analyzer */ + public void testRandomStrings() throws Exception { + checkRandomData(random, new HindiAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER); + } } diff --git a/modules/analysis/common/src/test/org/apache/lucene/analysis/hu/TestHungarianAnalyzer.java b/modules/analysis/common/src/test/org/apache/lucene/analysis/hu/TestHungarianAnalyzer.java index e00289f33bd..b2ada3be0d6 100644 --- a/modules/analysis/common/src/test/org/apache/lucene/analysis/hu/TestHungarianAnalyzer.java +++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/hu/TestHungarianAnalyzer.java @@ -50,4 +50,9 @@ public class TestHungarianAnalyzer extends BaseTokenStreamTestCase { checkOneTermReuse(a, "babakocsi", "babakocsi"); checkOneTermReuse(a, "babakocsijáért", "babakocs"); } + + /** blast some random strings through the analyzer */ + public void testRandomStrings() throws Exception { + checkRandomData(random, new HungarianAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER); + } } diff --git a/modules/analysis/common/src/test/org/apache/lucene/analysis/hy/TestArmenianAnalyzer.java b/modules/analysis/common/src/test/org/apache/lucene/analysis/hy/TestArmenianAnalyzer.java index 03c068b9aa7..68caf5c2c39 100644 --- a/modules/analysis/common/src/test/org/apache/lucene/analysis/hy/TestArmenianAnalyzer.java +++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/hy/TestArmenianAnalyzer.java @@ -50,4 +50,9 @@ public class TestArmenianAnalyzer extends BaseTokenStreamTestCase { checkOneTermReuse(a, "արծիվներ", "արծիվներ"); checkOneTermReuse(a, "արծիվ", "արծ"); } + + /** blast some random strings through the analyzer */ + public void testRandomStrings() throws Exception { + checkRandomData(random, new ArmenianAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER); + } } diff --git a/modules/analysis/common/src/test/org/apache/lucene/analysis/id/TestIndonesianAnalyzer.java b/modules/analysis/common/src/test/org/apache/lucene/analysis/id/TestIndonesianAnalyzer.java index 28877a700e9..3002e62bb99 100644 --- a/modules/analysis/common/src/test/org/apache/lucene/analysis/id/TestIndonesianAnalyzer.java +++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/id/TestIndonesianAnalyzer.java @@ -50,4 +50,9 @@ public class TestIndonesianAnalyzer extends BaseTokenStreamTestCase { checkOneTermReuse(a, "peledakan", "peledakan"); checkOneTermReuse(a, "pembunuhan", "bunuh"); } + + /** blast some random strings through the analyzer */ + public void testRandomStrings() throws Exception { + checkRandomData(random, new IndonesianAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER); + } } diff --git a/modules/analysis/common/src/test/org/apache/lucene/analysis/it/TestItalianAnalyzer.java b/modules/analysis/common/src/test/org/apache/lucene/analysis/it/TestItalianAnalyzer.java index 3348721298a..ae4bf2f2d24 100644 --- a/modules/analysis/common/src/test/org/apache/lucene/analysis/it/TestItalianAnalyzer.java +++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/it/TestItalianAnalyzer.java @@ -50,4 +50,9 @@ public class TestItalianAnalyzer extends BaseTokenStreamTestCase { checkOneTermReuse(a, "abbandonata", "abbandonata"); checkOneTermReuse(a, "abbandonati", "abbandon"); } + + /** blast some random strings through the analyzer */ + public void testRandomStrings() throws Exception { + checkRandomData(random, new ItalianAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER); + } } diff --git a/modules/analysis/common/src/test/org/apache/lucene/analysis/it/TestItalianLightStemFilter.java b/modules/analysis/common/src/test/org/apache/lucene/analysis/it/TestItalianLightStemFilter.java index b850630086c..6fbcd1be08d 100644 --- a/modules/analysis/common/src/test/org/apache/lucene/analysis/it/TestItalianLightStemFilter.java +++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/it/TestItalianLightStemFilter.java @@ -45,4 +45,9 @@ public class TestItalianLightStemFilter extends BaseTokenStreamTestCase { public void testVocabulary() throws IOException { assertVocabulary(analyzer, getDataFile("itlighttestdata.zip"), "itlight.txt"); } + + /** blast some random strings through the analyzer */ + public void testRandomStrings() throws Exception { + checkRandomData(random, analyzer, 10000*RANDOM_MULTIPLIER); + } } diff --git a/modules/analysis/common/src/test/org/apache/lucene/analysis/lv/TestLatvianAnalyzer.java b/modules/analysis/common/src/test/org/apache/lucene/analysis/lv/TestLatvianAnalyzer.java new file mode 100644 index 00000000000..724584582c4 --- /dev/null +++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/lv/TestLatvianAnalyzer.java @@ -0,0 +1,58 @@ +package org.apache.lucene.analysis.lv; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.util.HashSet; +import java.util.Set; + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.BaseTokenStreamTestCase; + +public class TestLatvianAnalyzer extends BaseTokenStreamTestCase { + /** This test fails with NPE when the + * stopwords file is missing in classpath */ + public void testResourcesAvailable() { + new LatvianAnalyzer(TEST_VERSION_CURRENT); + } + + /** test stopwords and stemming */ + public void testBasics() throws IOException { + Analyzer a = new LatvianAnalyzer(TEST_VERSION_CURRENT); + // stemming + checkOneTermReuse(a, "tirgiem", "tirg"); + checkOneTermReuse(a, "tirgus", "tirg"); + // stopword + assertAnalyzesTo(a, "un", new String[] {}); + } + + /** test use of exclusion set */ + public void testExclude() throws IOException { + Set exclusionSet = new HashSet(); + exclusionSet.add("tirgiem"); + Analyzer a = new LatvianAnalyzer(TEST_VERSION_CURRENT, + LatvianAnalyzer.getDefaultStopSet(), exclusionSet); + checkOneTermReuse(a, "tirgiem", "tirgiem"); + checkOneTermReuse(a, "tirgus", "tirg"); + } + + /** blast some random strings through the analyzer */ + public void testRandomStrings() throws Exception { + checkRandomData(random, new LatvianAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER); + } +} diff --git a/modules/analysis/common/src/test/org/apache/lucene/analysis/lv/TestLatvianStemmer.java b/modules/analysis/common/src/test/org/apache/lucene/analysis/lv/TestLatvianStemmer.java new file mode 100644 index 00000000000..4a32236d1c6 --- /dev/null +++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/lv/TestLatvianStemmer.java @@ -0,0 +1,272 @@ +package org.apache.lucene.analysis.lv; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.io.Reader; + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.BaseTokenStreamTestCase; +import org.apache.lucene.analysis.Tokenizer; +import org.apache.lucene.analysis.core.WhitespaceTokenizer; +import org.apache.lucene.analysis.util.ReusableAnalyzerBase; + +/** + * Basic tests for {@link LatvianStemmer} + */ +public class TestLatvianStemmer extends BaseTokenStreamTestCase { + private Analyzer a = new ReusableAnalyzerBase() { + @Override + protected TokenStreamComponents createComponents(String fieldName, Reader reader) { + Tokenizer tokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader); + return new TokenStreamComponents(tokenizer, new LatvianStemFilter(tokenizer)); + } + }; + + public void testNouns1() throws IOException { + // decl. I + checkOneTerm(a, "tēvs", "tēv"); // nom. sing. + checkOneTerm(a, "tēvi", "tēv"); // nom. pl. + checkOneTerm(a, "tēva", "tēv"); // gen. sing. + checkOneTerm(a, "tēvu", "tēv"); // gen. pl. + checkOneTerm(a, "tēvam", "tēv"); // dat. sing. + checkOneTerm(a, "tēviem", "tēv"); // dat. pl. + checkOneTerm(a, "tēvu", "tēv"); // acc. sing. + checkOneTerm(a, "tēvus", "tēv"); // acc. pl. + checkOneTerm(a, "tēvā", "tēv"); // loc. sing. + checkOneTerm(a, "tēvos", "tēv"); // loc. pl. + checkOneTerm(a, "tēvs", "tēv"); // voc. sing. + checkOneTerm(a, "tēvi", "tēv"); // voc. pl. + } + + /** + * decl II nouns with (s,t) -> š and (d,z) -> ž + * palatalization will generally conflate to two stems + * due to the ambiguity (plural and singular). + */ + public void testNouns2() throws IOException { + // decl. II + + // c -> č palatalization + checkOneTerm(a, "lācis", "lāc"); // nom. sing. + checkOneTerm(a, "lāči", "lāc"); // nom. pl. + checkOneTerm(a, "lāča", "lāc"); // gen. sing. + checkOneTerm(a, "lāču", "lāc"); // gen. pl. + checkOneTerm(a, "lācim", "lāc"); // dat. sing. + checkOneTerm(a, "lāčiem", "lāc"); // dat. pl. + checkOneTerm(a, "lāci", "lāc"); // acc. sing. + checkOneTerm(a, "lāčus", "lāc"); // acc. pl. + checkOneTerm(a, "lācī", "lāc"); // loc. sing. + checkOneTerm(a, "lāčos", "lāc"); // loc. pl. + checkOneTerm(a, "lāci", "lāc"); // voc. sing. + checkOneTerm(a, "lāči", "lāc"); // voc. pl. + + // n -> ņ palatalization + checkOneTerm(a, "akmens", "akmen"); // nom. sing. + checkOneTerm(a, "akmeņi", "akmen"); // nom. pl. + checkOneTerm(a, "akmens", "akmen"); // gen. sing. + checkOneTerm(a, "akmeņu", "akmen"); // gen. pl. + checkOneTerm(a, "akmenim", "akmen"); // dat. sing. + checkOneTerm(a, "akmeņiem", "akmen"); // dat. pl. + checkOneTerm(a, "akmeni", "akmen"); // acc. sing. + checkOneTerm(a, "akmeņus", "akmen"); // acc. pl. + checkOneTerm(a, "akmenī", "akmen"); // loc. sing. + checkOneTerm(a, "akmeņos", "akmen"); // loc. pl. + checkOneTerm(a, "akmens", "akmen"); // voc. sing. + checkOneTerm(a, "akmeņi", "akmen"); // voc. pl. + + // no palatalization + checkOneTerm(a, "kurmis", "kurm"); // nom. sing. + checkOneTerm(a, "kurmji", "kurm"); // nom. pl. + checkOneTerm(a, "kurmja", "kurm"); // gen. sing. + checkOneTerm(a, "kurmju", "kurm"); // gen. pl. + checkOneTerm(a, "kurmim", "kurm"); // dat. sing. + checkOneTerm(a, "kurmjiem", "kurm"); // dat. pl. + checkOneTerm(a, "kurmi", "kurm"); // acc. sing. + checkOneTerm(a, "kurmjus", "kurm"); // acc. pl. + checkOneTerm(a, "kurmī", "kurm"); // loc. sing. + checkOneTerm(a, "kurmjos", "kurm"); // loc. pl. + checkOneTerm(a, "kurmi", "kurm"); // voc. sing. + checkOneTerm(a, "kurmji", "kurm"); // voc. pl. + } + + public void testNouns3() throws IOException { + // decl III + checkOneTerm(a, "lietus", "liet"); // nom. sing. + checkOneTerm(a, "lieti", "liet"); // nom. pl. + checkOneTerm(a, "lietus", "liet"); // gen. sing. + checkOneTerm(a, "lietu", "liet"); // gen. pl. + checkOneTerm(a, "lietum", "liet"); // dat. sing. + checkOneTerm(a, "lietiem", "liet"); // dat. pl. + checkOneTerm(a, "lietu", "liet"); // acc. sing. + checkOneTerm(a, "lietus", "liet"); // acc. pl. + checkOneTerm(a, "lietū", "liet"); // loc. sing. + checkOneTerm(a, "lietos", "liet"); // loc. pl. + checkOneTerm(a, "lietus", "liet"); // voc. sing. + checkOneTerm(a, "lieti", "liet"); // voc. pl. + } + + public void testNouns4() throws IOException { + // decl IV + checkOneTerm(a, "lapa", "lap"); // nom. sing. + checkOneTerm(a, "lapas", "lap"); // nom. pl. + checkOneTerm(a, "lapas", "lap"); // gen. sing. + checkOneTerm(a, "lapu", "lap"); // gen. pl. + checkOneTerm(a, "lapai", "lap"); // dat. sing. + checkOneTerm(a, "lapām", "lap"); // dat. pl. + checkOneTerm(a, "lapu", "lap"); // acc. sing. + checkOneTerm(a, "lapas", "lap"); // acc. pl. + checkOneTerm(a, "lapā", "lap"); // loc. sing. + checkOneTerm(a, "lapās", "lap"); // loc. pl. + checkOneTerm(a, "lapa", "lap"); // voc. sing. + checkOneTerm(a, "lapas", "lap"); // voc. pl. + + checkOneTerm(a, "puika", "puik"); // nom. sing. + checkOneTerm(a, "puikas", "puik"); // nom. pl. + checkOneTerm(a, "puikas", "puik"); // gen. sing. + checkOneTerm(a, "puiku", "puik"); // gen. pl. + checkOneTerm(a, "puikam", "puik"); // dat. sing. + checkOneTerm(a, "puikām", "puik"); // dat. pl. + checkOneTerm(a, "puiku", "puik"); // acc. sing. + checkOneTerm(a, "puikas", "puik"); // acc. pl. + checkOneTerm(a, "puikā", "puik"); // loc. sing. + checkOneTerm(a, "puikās", "puik"); // loc. pl. + checkOneTerm(a, "puika", "puik"); // voc. sing. + checkOneTerm(a, "puikas", "puik"); // voc. pl. + } + + /** + * Genitive plural forms with (s,t) -> š and (d,z) -> ž + * will not conflate due to ambiguity. + */ + public void testNouns5() throws IOException { + // decl V + // l -> ļ palatalization + checkOneTerm(a, "egle", "egl"); // nom. sing. + checkOneTerm(a, "egles", "egl"); // nom. pl. + checkOneTerm(a, "egles", "egl"); // gen. sing. + checkOneTerm(a, "egļu", "egl"); // gen. pl. + checkOneTerm(a, "eglei", "egl"); // dat. sing. + checkOneTerm(a, "eglēm", "egl"); // dat. pl. + checkOneTerm(a, "egli", "egl"); // acc. sing. + checkOneTerm(a, "egles", "egl"); // acc. pl. + checkOneTerm(a, "eglē", "egl"); // loc. sing. + checkOneTerm(a, "eglēs", "egl"); // loc. pl. + checkOneTerm(a, "egle", "egl"); // voc. sing. + checkOneTerm(a, "egles", "egl"); // voc. pl. + } + + public void testNouns6() throws IOException { + // decl VI + + // no palatalization + checkOneTerm(a, "govs", "gov"); // nom. sing. + checkOneTerm(a, "govis", "gov"); // nom. pl. + checkOneTerm(a, "govs", "gov"); // gen. sing. + checkOneTerm(a, "govju", "gov"); // gen. pl. + checkOneTerm(a, "govij", "gov"); // dat. sing. + checkOneTerm(a, "govīm", "gov"); // dat. pl. + checkOneTerm(a, "govi ", "gov"); // acc. sing. + checkOneTerm(a, "govis", "gov"); // acc. pl. + checkOneTerm(a, "govi ", "gov"); // inst. sing. + checkOneTerm(a, "govīm", "gov"); // inst. pl. + checkOneTerm(a, "govī", "gov"); // loc. sing. + checkOneTerm(a, "govīs", "gov"); // loc. pl. + checkOneTerm(a, "govs", "gov"); // voc. sing. + checkOneTerm(a, "govis", "gov"); // voc. pl. + } + + public void testAdjectives() throws IOException { + checkOneTerm(a, "zils", "zil"); // indef. nom. masc. sing. + checkOneTerm(a, "zilais", "zil"); // def. nom. masc. sing. + checkOneTerm(a, "zili", "zil"); // indef. nom. masc. pl. + checkOneTerm(a, "zilie", "zil"); // def. nom. masc. pl. + checkOneTerm(a, "zila", "zil"); // indef. nom. fem. sing. + checkOneTerm(a, "zilā", "zil"); // def. nom. fem. sing. + checkOneTerm(a, "zilas", "zil"); // indef. nom. fem. pl. + checkOneTerm(a, "zilās", "zil"); // def. nom. fem. pl. + checkOneTerm(a, "zila", "zil"); // indef. gen. masc. sing. + checkOneTerm(a, "zilā", "zil"); // def. gen. masc. sing. + checkOneTerm(a, "zilu", "zil"); // indef. gen. masc. pl. + checkOneTerm(a, "zilo", "zil"); // def. gen. masc. pl. + checkOneTerm(a, "zilas", "zil"); // indef. gen. fem. sing. + checkOneTerm(a, "zilās", "zil"); // def. gen. fem. sing. + checkOneTerm(a, "zilu", "zil"); // indef. gen. fem. pl. + checkOneTerm(a, "zilo", "zil"); // def. gen. fem. pl. + checkOneTerm(a, "zilam", "zil"); // indef. dat. masc. sing. + checkOneTerm(a, "zilajam", "zil"); // def. dat. masc. sing. + checkOneTerm(a, "ziliem", "zil"); // indef. dat. masc. pl. + checkOneTerm(a, "zilajiem", "zil"); // def. dat. masc. pl. + checkOneTerm(a, "zilai", "zil"); // indef. dat. fem. sing. + checkOneTerm(a, "zilajai", "zil"); // def. dat. fem. sing. + checkOneTerm(a, "zilām", "zil"); // indef. dat. fem. pl. + checkOneTerm(a, "zilajām", "zil"); // def. dat. fem. pl. + checkOneTerm(a, "zilu", "zil"); // indef. acc. masc. sing. + checkOneTerm(a, "zilo", "zil"); // def. acc. masc. sing. + checkOneTerm(a, "zilus", "zil"); // indef. acc. masc. pl. + checkOneTerm(a, "zilos", "zil"); // def. acc. masc. pl. + checkOneTerm(a, "zilu", "zil"); // indef. acc. fem. sing. + checkOneTerm(a, "zilo", "zil"); // def. acc. fem. sing. + checkOneTerm(a, "zilās", "zil"); // indef. acc. fem. pl. + checkOneTerm(a, "zilās", "zil"); // def. acc. fem. pl. + checkOneTerm(a, "zilā", "zil"); // indef. loc. masc. sing. + checkOneTerm(a, "zilajā", "zil"); // def. loc. masc. sing. + checkOneTerm(a, "zilos", "zil"); // indef. loc. masc. pl. + checkOneTerm(a, "zilajos", "zil"); // def. loc. masc. pl. + checkOneTerm(a, "zilā", "zil"); // indef. loc. fem. sing. + checkOneTerm(a, "zilajā", "zil"); // def. loc. fem. sing. + checkOneTerm(a, "zilās", "zil"); // indef. loc. fem. pl. + checkOneTerm(a, "zilajās", "zil"); // def. loc. fem. pl. + checkOneTerm(a, "zilais", "zil"); // voc. masc. sing. + checkOneTerm(a, "zilie", "zil"); // voc. masc. pl. + checkOneTerm(a, "zilā", "zil"); // voc. fem. sing. + checkOneTerm(a, "zilās", "zil"); // voc. fem. pl. + } + + /** + * Note: we intentionally don't handle the ambiguous + * (s,t) -> š and (d,z) -> ž + */ + public void testPalatalization() throws IOException { + checkOneTerm(a, "krāsns", "krāsn"); // nom. sing. + checkOneTerm(a, "krāšņu", "krāsn"); // gen. pl. + checkOneTerm(a, "zvaigzne", "zvaigzn"); // nom. sing. + checkOneTerm(a, "zvaigžņu", "zvaigzn"); // gen. pl. + checkOneTerm(a, "kāpslis", "kāpsl"); // nom. sing. + checkOneTerm(a, "kāpšļu", "kāpsl"); // gen. pl. + checkOneTerm(a, "zizlis", "zizl"); // nom. sing. + checkOneTerm(a, "zižļu", "zizl"); // gen. pl. + checkOneTerm(a, "vilnis", "viln"); // nom. sing. + checkOneTerm(a, "viļņu", "viln"); // gen. pl. + checkOneTerm(a, "lelle", "lell"); // nom. sing. + checkOneTerm(a, "leļļu", "lell"); // gen. pl. + checkOneTerm(a, "pinne", "pinn"); // nom. sing. + checkOneTerm(a, "piņņu", "pinn"); // gen. pl. + checkOneTerm(a, "rīkste", "rīkst"); // nom. sing. + checkOneTerm(a, "rīkšu", "rīkst"); // gen. pl. + } + + /** + * Test some length restrictions, we require a 3+ char stem, + * with at least one vowel. + */ + public void testLength() throws IOException { + checkOneTerm(a, "usa", "usa"); // length + checkOneTerm(a, "60ms", "60ms"); // vowel count + } +} diff --git a/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLimitTokenCountAnalyzer.java b/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLimitTokenCountAnalyzer.java index 3f6c3ead770..98f687edb3d 100644 --- a/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLimitTokenCountAnalyzer.java +++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLimitTokenCountAnalyzer.java @@ -51,7 +51,7 @@ public class TestLimitTokenCountAnalyzer extends BaseTokenStreamTestCase { Directory dir = newDirectory(); IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig( - TEST_VERSION_CURRENT, new LimitTokenCountAnalyzer(new MockAnalyzer(), 100000))); + TEST_VERSION_CURRENT, new LimitTokenCountAnalyzer(new MockAnalyzer(random), 100000))); Document doc = new Document(); StringBuilder b = new StringBuilder(); diff --git a/modules/analysis/common/src/test/org/apache/lucene/analysis/nl/TestDutchStemmer.java b/modules/analysis/common/src/test/org/apache/lucene/analysis/nl/TestDutchStemmer.java index 1c2f72763ee..7477893cfd8 100644 --- a/modules/analysis/common/src/test/org/apache/lucene/analysis/nl/TestDutchStemmer.java +++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/nl/TestDutchStemmer.java @@ -185,4 +185,9 @@ public class TestDutchStemmer extends BaseTokenStreamTestCase { checkOneTerm(new DutchAnalyzer(TEST_VERSION_CURRENT), input, expected); } + /** blast some random strings through the analyzer */ + public void testRandomStrings() throws Exception { + checkRandomData(random, new DutchAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER); + } + } \ No newline at end of file diff --git a/modules/analysis/common/src/test/org/apache/lucene/analysis/no/TestNorwegianAnalyzer.java b/modules/analysis/common/src/test/org/apache/lucene/analysis/no/TestNorwegianAnalyzer.java index ebcb607f983..9990fdac95c 100644 --- a/modules/analysis/common/src/test/org/apache/lucene/analysis/no/TestNorwegianAnalyzer.java +++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/no/TestNorwegianAnalyzer.java @@ -50,4 +50,9 @@ public class TestNorwegianAnalyzer extends BaseTokenStreamTestCase { checkOneTermReuse(a, "havnedistriktene", "havnedistriktene"); checkOneTermReuse(a, "havnedistrikter", "havnedistrikt"); } + + /** blast some random strings through the analyzer */ + public void testRandomStrings() throws Exception { + checkRandomData(random, new NorwegianAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER); + } } diff --git a/modules/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseAnalyzer.java b/modules/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseAnalyzer.java index 35befb76c8b..9453cb21656 100644 --- a/modules/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseAnalyzer.java +++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseAnalyzer.java @@ -50,4 +50,9 @@ public class TestPortugueseAnalyzer extends BaseTokenStreamTestCase { checkOneTermReuse(a, "quilométricas", "quilométricas"); checkOneTermReuse(a, "quilométricos", "quilométr"); } + + /** blast some random strings through the analyzer */ + public void testRandomStrings() throws Exception { + checkRandomData(random, new PortugueseAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER); + } } diff --git a/modules/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseLightStemFilter.java b/modules/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseLightStemFilter.java index 6f5fdcf148d..bd5405b034b 100644 --- a/modules/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseLightStemFilter.java +++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseLightStemFilter.java @@ -92,4 +92,9 @@ public class TestPortugueseLightStemFilter extends BaseTokenStreamTestCase { public void testVocabulary() throws IOException { assertVocabulary(analyzer, getDataFile("ptlighttestdata.zip"), "ptlight.txt"); } + + /** blast some random strings through the analyzer */ + public void testRandomStrings() throws Exception { + checkRandomData(random, analyzer, 10000*RANDOM_MULTIPLIER); + } } diff --git a/modules/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseMinimalStemFilter.java b/modules/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseMinimalStemFilter.java index 64a2dd7ac51..27fda5e244b 100644 --- a/modules/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseMinimalStemFilter.java +++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseMinimalStemFilter.java @@ -66,4 +66,9 @@ public class TestPortugueseMinimalStemFilter extends BaseTokenStreamTestCase { public void testVocabulary() throws IOException { assertVocabulary(analyzer, getDataFile("ptminimaltestdata.zip"), "ptminimal.txt"); } + + /** blast some random strings through the analyzer */ + public void testRandomStrings() throws Exception { + checkRandomData(random, analyzer, 10000*RANDOM_MULTIPLIER); + } } diff --git a/modules/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseStemFilter.java b/modules/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseStemFilter.java index ee7c6eee4db..4cf797a008e 100644 --- a/modules/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseStemFilter.java +++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseStemFilter.java @@ -66,4 +66,9 @@ public class TestPortugueseStemFilter extends BaseTokenStreamTestCase { public void testVocabulary() throws IOException { assertVocabulary(analyzer, getDataFile("ptrslptestdata.zip"), "ptrslp.txt"); } + + /** blast some random strings through the analyzer */ + public void testRandomStrings() throws Exception { + checkRandomData(random, analyzer, 10000*RANDOM_MULTIPLIER); + } } diff --git a/modules/analysis/common/src/test/org/apache/lucene/analysis/ro/TestRomanianAnalyzer.java b/modules/analysis/common/src/test/org/apache/lucene/analysis/ro/TestRomanianAnalyzer.java index 44e3424499f..ee6f94b0404 100644 --- a/modules/analysis/common/src/test/org/apache/lucene/analysis/ro/TestRomanianAnalyzer.java +++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/ro/TestRomanianAnalyzer.java @@ -50,4 +50,9 @@ public class TestRomanianAnalyzer extends BaseTokenStreamTestCase { checkOneTermReuse(a, "absenţa", "absenţa"); checkOneTermReuse(a, "absenţi", "absenţ"); } + + /** blast some random strings through the analyzer */ + public void testRandomStrings() throws Exception { + checkRandomData(random, new RomanianAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER); + } } diff --git a/modules/analysis/common/src/test/org/apache/lucene/analysis/ru/TestRussianAnalyzer.java b/modules/analysis/common/src/test/org/apache/lucene/analysis/ru/TestRussianAnalyzer.java index b52ec1bd781..b0534e816c0 100644 --- a/modules/analysis/common/src/test/org/apache/lucene/analysis/ru/TestRussianAnalyzer.java +++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/ru/TestRussianAnalyzer.java @@ -64,4 +64,9 @@ public class TestRussianAnalyzer extends BaseTokenStreamTestCase { new String[] { "вмест", "сил", "электромагнитн", "энерг", "имел", "представление" }); } + + /** blast some random strings through the analyzer */ + public void testRandomStrings() throws Exception { + checkRandomData(random, new RussianAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER); + } } diff --git a/modules/analysis/common/src/test/org/apache/lucene/analysis/ru/TestRussianLightStemFilter.java b/modules/analysis/common/src/test/org/apache/lucene/analysis/ru/TestRussianLightStemFilter.java index b524d2a62a4..cee4eb7ada0 100644 --- a/modules/analysis/common/src/test/org/apache/lucene/analysis/ru/TestRussianLightStemFilter.java +++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/ru/TestRussianLightStemFilter.java @@ -45,4 +45,9 @@ public class TestRussianLightStemFilter extends BaseTokenStreamTestCase { public void testVocabulary() throws IOException { assertVocabulary(analyzer, getDataFile("rulighttestdata.zip"), "rulight.txt"); } + + /** blast some random strings through the analyzer */ + public void testRandomStrings() throws Exception { + checkRandomData(random, analyzer, 10000*RANDOM_MULTIPLIER); + } } diff --git a/modules/analysis/common/src/test/org/apache/lucene/analysis/sv/TestSwedishAnalyzer.java b/modules/analysis/common/src/test/org/apache/lucene/analysis/sv/TestSwedishAnalyzer.java index d64ad1dbc6c..493da3abca9 100644 --- a/modules/analysis/common/src/test/org/apache/lucene/analysis/sv/TestSwedishAnalyzer.java +++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/sv/TestSwedishAnalyzer.java @@ -23,6 +23,7 @@ import java.util.Set; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.BaseTokenStreamTestCase; +import org.apache.lucene.analysis.hu.HungarianAnalyzer; public class TestSwedishAnalyzer extends BaseTokenStreamTestCase { /** This test fails with NPE when the @@ -50,4 +51,9 @@ public class TestSwedishAnalyzer extends BaseTokenStreamTestCase { checkOneTermReuse(a, "jaktkarlarne", "jaktkarlarne"); checkOneTermReuse(a, "jaktkarlens", "jaktkarl"); } + + /** blast some random strings through the analyzer */ + public void testRandomStrings() throws Exception { + checkRandomData(random, new SwedishAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER); + } } diff --git a/modules/analysis/common/src/test/org/apache/lucene/analysis/sv/TestSwedishLightStemFilter.java b/modules/analysis/common/src/test/org/apache/lucene/analysis/sv/TestSwedishLightStemFilter.java index b6b825e0c8a..db71d1b58af 100644 --- a/modules/analysis/common/src/test/org/apache/lucene/analysis/sv/TestSwedishLightStemFilter.java +++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/sv/TestSwedishLightStemFilter.java @@ -45,4 +45,9 @@ public class TestSwedishLightStemFilter extends BaseTokenStreamTestCase { public void testVocabulary() throws IOException { assertVocabulary(analyzer, getDataFile("svlighttestdata.zip"), "svlight.txt"); } + + /** blast some random strings through the analyzer */ + public void testRandomStrings() throws Exception { + checkRandomData(random, analyzer, 10000*RANDOM_MULTIPLIER); + } } diff --git a/modules/analysis/common/src/test/org/apache/lucene/analysis/th/TestThaiAnalyzer.java b/modules/analysis/common/src/test/org/apache/lucene/analysis/th/TestThaiAnalyzer.java index 01864f533f1..6247bbf97be 100644 --- a/modules/analysis/common/src/test/org/apache/lucene/analysis/th/TestThaiAnalyzer.java +++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/th/TestThaiAnalyzer.java @@ -17,7 +17,11 @@ package org.apache.lucene.analysis.th; * limitations under the License. */ +import java.io.StringReader; + import org.apache.lucene.analysis.BaseTokenStreamTestCase; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.tokenattributes.FlagsAttribute; import org.apache.lucene.util.Version; /** @@ -142,5 +146,23 @@ public class TestThaiAnalyzer extends BaseTokenStreamTestCase { analyzer, "บริษัทชื่อ XY&Z - คุยกับ xyz@demo.com", new String[] { "บริษัท", "ชื่อ", "xy&z", "คุย", "กับ", "xyz@demo.com" }); - } + } + + /** blast some random strings through the analyzer */ + public void testRandomStrings() throws Exception { + checkRandomData(random, new ThaiAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER); + } + + // LUCENE-3044 + public void testAttributeReuse() throws Exception { + assumeTrue("JRE does not support Thai dictionary-based BreakIterator", ThaiWordFilter.DBBI_AVAILABLE); + ThaiAnalyzer analyzer = new ThaiAnalyzer(Version.LUCENE_30); + // just consume + TokenStream ts = analyzer.reusableTokenStream("dummy", new StringReader("ภาษาไทย")); + assertTokenStreamContents(ts, new String[] { "ภาษา", "ไทย" }); + // this consumer adds flagsAtt, which this analyzer does not use. + ts = analyzer.reusableTokenStream("dummy", new StringReader("ภาษาไทย")); + ts.addAttribute(FlagsAttribute.class); + assertTokenStreamContents(ts, new String[] { "ภาษา", "ไทย" }); + } } diff --git a/modules/analysis/common/src/test/org/apache/lucene/analysis/tr/TestTurkishAnalyzer.java b/modules/analysis/common/src/test/org/apache/lucene/analysis/tr/TestTurkishAnalyzer.java index cf8fed9a451..4b9587a3810 100644 --- a/modules/analysis/common/src/test/org/apache/lucene/analysis/tr/TestTurkishAnalyzer.java +++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/tr/TestTurkishAnalyzer.java @@ -50,4 +50,9 @@ public class TestTurkishAnalyzer extends BaseTokenStreamTestCase { checkOneTermReuse(a, "ağacı", "ağacı"); checkOneTermReuse(a, "ağaç", "ağaç"); } + + /** blast some random strings through the analyzer */ + public void testRandomStrings() throws Exception { + checkRandomData(random, new TurkishAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER); + } } diff --git a/modules/analysis/common/src/test/org/apache/lucene/collation/CollationTestBase.java b/modules/analysis/common/src/test/org/apache/lucene/collation/CollationTestBase.java index b78815397b1..442788a389c 100644 --- a/modules/analysis/common/src/test/org/apache/lucene/collation/CollationTestBase.java +++ b/modules/analysis/common/src/test/org/apache/lucene/collation/CollationTestBase.java @@ -186,7 +186,7 @@ public abstract class CollationTestBase extends LuceneTestCase { String dkResult) throws Exception { RAMDirectory indexStore = new RAMDirectory(); IndexWriter writer = new IndexWriter(indexStore, new IndexWriterConfig( - TEST_VERSION_CURRENT, new MockAnalyzer(MockTokenizer.WHITESPACE, false))); + TEST_VERSION_CURRENT, new MockAnalyzer(random, MockTokenizer.WHITESPACE, false))); // document data: // the tracer field is used to determine which document was hit diff --git a/modules/analysis/icu/build.xml b/modules/analysis/icu/build.xml index c6aea1459cf..91823d75195 100644 --- a/modules/analysis/icu/build.xml +++ b/modules/analysis/icu/build.xml @@ -137,4 +137,20 @@ are part of the ICU4C package. See http://site.icu-project.org/ + + + + + + + + + + + + + + diff --git a/modules/analysis/icu/src/test/org/apache/lucene/analysis/icu/TestICUFoldingFilter.java b/modules/analysis/icu/src/test/org/apache/lucene/analysis/icu/TestICUFoldingFilter.java index 4bf654b41a2..bc7a74d828c 100644 --- a/modules/analysis/icu/src/test/org/apache/lucene/analysis/icu/TestICUFoldingFilter.java +++ b/modules/analysis/icu/src/test/org/apache/lucene/analysis/icu/TestICUFoldingFilter.java @@ -29,15 +29,14 @@ import org.apache.lucene.analysis.core.WhitespaceTokenizer; * Tests ICUFoldingFilter */ public class TestICUFoldingFilter extends BaseTokenStreamTestCase { + Analyzer a = new Analyzer() { + @Override + public TokenStream tokenStream(String fieldName, Reader reader) { + return new ICUFoldingFilter( + new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader)); + } + }; public void testDefaults() throws IOException { - Analyzer a = new Analyzer() { - @Override - public TokenStream tokenStream(String fieldName, Reader reader) { - return new ICUFoldingFilter( - new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader)); - } - }; - // case folding assertAnalyzesTo(a, "This is a test", new String[] { "this", "is", "a", "test" }); @@ -76,4 +75,9 @@ public class TestICUFoldingFilter extends BaseTokenStreamTestCase { // handling of decomposed combining-dot-above assertAnalyzesTo(a, "eli\u0307f", new String[] { "elif" }); } + + /** blast some random strings through the analyzer */ + public void testRandomStrings() throws Exception { + checkRandomData(random, a, 10000*RANDOM_MULTIPLIER); + } } diff --git a/modules/analysis/icu/src/test/org/apache/lucene/analysis/icu/TestICUNormalizer2Filter.java b/modules/analysis/icu/src/test/org/apache/lucene/analysis/icu/TestICUNormalizer2Filter.java index 796627013a0..1a503cdd95f 100644 --- a/modules/analysis/icu/src/test/org/apache/lucene/analysis/icu/TestICUNormalizer2Filter.java +++ b/modules/analysis/icu/src/test/org/apache/lucene/analysis/icu/TestICUNormalizer2Filter.java @@ -31,16 +31,15 @@ import com.ibm.icu.text.Normalizer2; * Tests the ICUNormalizer2Filter */ public class TestICUNormalizer2Filter extends BaseTokenStreamTestCase { + Analyzer a = new Analyzer() { + @Override + public TokenStream tokenStream(String fieldName, Reader reader) { + return new ICUNormalizer2Filter( + new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader)); + } + }; public void testDefaults() throws IOException { - Analyzer a = new Analyzer() { - @Override - public TokenStream tokenStream(String fieldName, Reader reader) { - return new ICUNormalizer2Filter( - new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader)); - } - }; - // case folding assertAnalyzesTo(a, "This is a test", new String[] { "this", "is", "a", "test" }); @@ -75,4 +74,9 @@ public class TestICUNormalizer2Filter extends BaseTokenStreamTestCase { // decompose EAcute into E + combining Acute assertAnalyzesTo(a, "\u00E9", new String[] { "\u0065\u0301" }); } + + /** blast some random strings through the analyzer */ + public void testRandomStrings() throws Exception { + checkRandomData(random, a, 10000*RANDOM_MULTIPLIER); + } } diff --git a/modules/analysis/icu/src/test/org/apache/lucene/analysis/icu/TestICUTransformFilter.java b/modules/analysis/icu/src/test/org/apache/lucene/analysis/icu/TestICUTransformFilter.java index 29733ce75e6..a56b22dad34 100644 --- a/modules/analysis/icu/src/test/org/apache/lucene/analysis/icu/TestICUTransformFilter.java +++ b/modules/analysis/icu/src/test/org/apache/lucene/analysis/icu/TestICUTransformFilter.java @@ -18,10 +18,15 @@ package org.apache.lucene.analysis.icu; */ import java.io.IOException; +import java.io.Reader; import java.io.StringReader; +import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.BaseTokenStreamTestCase; +import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.core.KeywordTokenizer; +import org.apache.lucene.analysis.core.WhitespaceTokenizer; +import org.apache.lucene.analysis.util.ReusableAnalyzerBase; import org.apache.lucene.analysis.TokenStream; import com.ibm.icu.text.Transliterator; @@ -83,4 +88,17 @@ public class TestICUTransformFilter extends BaseTokenStreamTestCase { TokenStream ts = new ICUTransformFilter(new KeywordTokenizer((new StringReader(input))), transform); assertTokenStreamContents(ts, new String[] { expected }); } + + /** blast some random strings through the analyzer */ + public void testRandomStrings() throws Exception { + final Transliterator transform = Transliterator.getInstance("Any-Latin"); + Analyzer a = new ReusableAnalyzerBase() { + @Override + protected TokenStreamComponents createComponents(String fieldName, Reader reader) { + Tokenizer tokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader); + return new TokenStreamComponents(tokenizer, new ICUTransformFilter(tokenizer, transform)); + } + }; + checkRandomData(random, a, 1000*RANDOM_MULTIPLIER); + } } diff --git a/modules/analysis/icu/src/test/org/apache/lucene/analysis/icu/segmentation/TestICUTokenizer.java b/modules/analysis/icu/src/test/org/apache/lucene/analysis/icu/segmentation/TestICUTokenizer.java index ccef95e2504..82afd63896c 100644 --- a/modules/analysis/icu/src/test/org/apache/lucene/analysis/icu/segmentation/TestICUTokenizer.java +++ b/modules/analysis/icu/src/test/org/apache/lucene/analysis/icu/segmentation/TestICUTokenizer.java @@ -232,4 +232,9 @@ public class TestICUTokenizer extends BaseTokenStreamTestCase { new String[] { "仮", "名", "遣", "い", "カタカナ" }, new String[] { "", "", "", "", "" }); } + + /** blast some random strings through the analyzer */ + public void testRandomStrings() throws Exception { + checkRandomData(random, a, 10000*RANDOM_MULTIPLIER); + } } diff --git a/modules/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/hhmm/SegGraph.java b/modules/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/hhmm/SegGraph.java index 5434fe4aa73..f7b40d54c5b 100644 --- a/modules/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/hhmm/SegGraph.java +++ b/modules/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/hhmm/SegGraph.java @@ -75,7 +75,7 @@ class SegGraph { List result = new ArrayList(); int s = -1, count = 0, size = tokenListTable.size(); List tokenList; - short index = 0; + int index = 0; while (count < size) { if (isStartExist(s)) { tokenList = tokenListTable.get(s); diff --git a/modules/analysis/smartcn/src/test/org/apache/lucene/analysis/cn/smart/TestSmartChineseAnalyzer.java b/modules/analysis/smartcn/src/test/org/apache/lucene/analysis/cn/smart/TestSmartChineseAnalyzer.java index 01b2d94c894..3f7ad7794f3 100644 --- a/modules/analysis/smartcn/src/test/org/apache/lucene/analysis/cn/smart/TestSmartChineseAnalyzer.java +++ b/modules/analysis/smartcn/src/test/org/apache/lucene/analysis/cn/smart/TestSmartChineseAnalyzer.java @@ -17,8 +17,11 @@ package org.apache.lucene.analysis.cn.smart; +import java.io.StringReader; + import org.apache.lucene.analysis.BaseTokenStreamTestCase; import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.util.Version; public class TestSmartChineseAnalyzer extends BaseTokenStreamTestCase { @@ -166,4 +169,35 @@ public class TestSmartChineseAnalyzer extends BaseTokenStreamTestCase { new int[] { 0, 1, 3, 4, 6, 7 }, new int[] { 1, 3, 4, 6, 7, 9 }); } + + // LUCENE-3026 + public void testLargeDocument() throws Exception { + StringBuilder sb = new StringBuilder(); + for (int i = 0; i < 5000; i++) { + sb.append("我购买了道具和服装。"); + } + Analyzer analyzer = new SmartChineseAnalyzer(TEST_VERSION_CURRENT); + TokenStream stream = analyzer.reusableTokenStream("", new StringReader(sb.toString())); + stream.reset(); + while (stream.incrementToken()) { + } + } + + // LUCENE-3026 + public void testLargeSentence() throws Exception { + StringBuilder sb = new StringBuilder(); + for (int i = 0; i < 5000; i++) { + sb.append("我购买了道具和服装"); + } + Analyzer analyzer = new SmartChineseAnalyzer(TEST_VERSION_CURRENT); + TokenStream stream = analyzer.reusableTokenStream("", new StringReader(sb.toString())); + stream.reset(); + while (stream.incrementToken()) { + } + } + + /** blast some random strings through the analyzer */ + public void testRandomStrings() throws Exception { + checkRandomData(random, new SmartChineseAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER); + } } diff --git a/modules/analysis/stempel/src/test/org/apache/lucene/analysis/pl/TestPolishAnalyzer.java b/modules/analysis/stempel/src/test/org/apache/lucene/analysis/pl/TestPolishAnalyzer.java index fe83796d1c3..6cb7c7de3bd 100644 --- a/modules/analysis/stempel/src/test/org/apache/lucene/analysis/pl/TestPolishAnalyzer.java +++ b/modules/analysis/stempel/src/test/org/apache/lucene/analysis/pl/TestPolishAnalyzer.java @@ -50,4 +50,9 @@ public class TestPolishAnalyzer extends BaseTokenStreamTestCase { checkOneTermReuse(a, "studenta", "studenta"); checkOneTermReuse(a, "studenci", "student"); } + + /** blast some random strings through the analyzer */ + public void testRandomStrings() throws Exception { + checkRandomData(random, new PolishAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER); + } } diff --git a/modules/benchmark/CHANGES.txt b/modules/benchmark/CHANGES.txt index 00b6a5134c6..cf27bd978b2 100644 --- a/modules/benchmark/CHANGES.txt +++ b/modules/benchmark/CHANGES.txt @@ -2,6 +2,22 @@ Lucene Benchmark Contrib Change Log The Benchmark contrib package contains code for benchmarking Lucene in a variety of ways. +03/31/2011 + Updated ReadTask to the new method for obtaining a top-level deleted docs + bitset. Also checking the bitset for null, when there are no deleted docs. + (Steve Rowe, Mike McCandless) + + Updated NewAnalyzerTask and NewShingleAnalyzerTask to handle analyzers + in the new org.apache.lucene.analysis.core package (KeywordAnalyzer, + SimpleAnalyzer, etc.) (Steve Rowe, Robert Muir) + + Updated ReadTokensTask to convert tokens to their indexed forms + (char[]->byte[]), just as the indexer does. This allows measurement + of the conversion process, which is important for analysis components + that customize it, e.g. (ICU)CollationKeyFilter. As a result, + benchmarks that incorporate this task will no longer be directly + comparable between 3.X and 4.0. (Robert Muir, Steve Rowe) + 03/24/2011 LUCENE-2977: WriteLineDocTask now automatically detects how to write - GZip or BZip2 or Plain-text - according to the output file extension. diff --git a/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecDocParser.java b/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecDocParser.java index 216cdebd7c7..ce19ecd94e6 100644 --- a/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecDocParser.java +++ b/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecDocParser.java @@ -1,136 +1,136 @@ -package org.apache.lucene.benchmark.byTask.feeds; - -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import java.io.File; -import java.io.IOException; -import java.util.HashMap; -import java.util.Locale; -import java.util.Map; - -/** - * Parser for trec doc content, invoked on doc text excluding and - * which are handled in TrecContentSource. Required to be stateless and hence thread safe. - */ -public abstract class TrecDocParser { - - /** Types of trec parse paths, */ - public enum ParsePathType { GOV2, FBIS, FT, FR94, LATIMES } - - /** trec parser type used for unknown extensions */ - public static final ParsePathType DEFAULT_PATH_TYPE = ParsePathType.GOV2; - - static final Map pathType2parser = new HashMap(); - static { - pathType2parser.put(ParsePathType.GOV2, new TrecGov2Parser()); - pathType2parser.put(ParsePathType.FBIS, new TrecFBISParser()); - pathType2parser.put(ParsePathType.FR94, new TrecFR94Parser()); - pathType2parser.put(ParsePathType.FT, new TrecFTParser()); - pathType2parser.put(ParsePathType.LATIMES, new TrecLATimesParser()); - } - - static final Map pathName2Type = new HashMap(); - static { - for (ParsePathType ppt : ParsePathType.values()) { - pathName2Type.put(ppt.name().toUpperCase(Locale.ENGLISH),ppt); - } - } - - /** max length of walk up from file to its ancestors when looking for a known path type */ - private static final int MAX_PATH_LENGTH = 10; - - /** - * Compute the path type of a file by inspecting name of file and its parents - */ - public static ParsePathType pathType(File f) { - int pathLength = 0; - while (f != null && ++pathLength < MAX_PATH_LENGTH) { - ParsePathType ppt = pathName2Type.get(f.getName().toUpperCase(Locale.ENGLISH)); - if (ppt!=null) { - return ppt; - } - f = f.getParentFile(); - } - return DEFAULT_PATH_TYPE; - } - - /** - * parse the text prepared in docBuf into a result DocData, - * no synchronization is required. - * @param docData reusable result - * @param name name that should be set to the result - * @param trecSrc calling trec content source - * @param docBuf text to parse - * @param pathType type of parsed file, or null if unknown - may be used by - * parsers to alter their behavior according to the file path type. - */ - public abstract DocData parse(DocData docData, String name, TrecContentSource trecSrc, - StringBuilder docBuf, ParsePathType pathType) throws IOException, InterruptedException; - - /** - * strip tags from buf: each tag is replaced by a single blank. - * @return text obtained when stripping all tags from buf (Input StringBuilder is unmodified). - */ - public static String stripTags(StringBuilder buf, int start) { - return stripTags(buf.substring(start),0); - } - - /** - * strip tags from input. - * @see #stripTags(StringBuilder, int) - */ - public static String stripTags(String buf, int start) { - if (start>0) { - buf = buf.substring(0); - } - return buf.replaceAll("<[^>]*>", " "); - } - - /** - * Extract from buf the text of interest within specified tags - * @param buf entire input text - * @param startTag tag marking start of text of interest - * @param endTag tag marking end of text of interest - * @param maxPos if ≥ 0 sets a limit on start of text of interest - * @return text of interest or null if not found - */ - public static String extract(StringBuilder buf, String startTag, String endTag, int maxPos, String noisePrefixes[]) { - int k1 = buf.indexOf(startTag); - if (k1>=0 && (maxPos<0 || k1=0 && (maxPos<0 || k2=0 && k1a2<>1?",0)); - //} - -} +package org.apache.lucene.benchmark.byTask.feeds; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.File; +import java.io.IOException; +import java.util.HashMap; +import java.util.Locale; +import java.util.Map; + +/** + * Parser for trec doc content, invoked on doc text excluding and + * which are handled in TrecContentSource. Required to be stateless and hence thread safe. + */ +public abstract class TrecDocParser { + + /** Types of trec parse paths, */ + public enum ParsePathType { GOV2, FBIS, FT, FR94, LATIMES } + + /** trec parser type used for unknown extensions */ + public static final ParsePathType DEFAULT_PATH_TYPE = ParsePathType.GOV2; + + static final Map pathType2parser = new HashMap(); + static { + pathType2parser.put(ParsePathType.GOV2, new TrecGov2Parser()); + pathType2parser.put(ParsePathType.FBIS, new TrecFBISParser()); + pathType2parser.put(ParsePathType.FR94, new TrecFR94Parser()); + pathType2parser.put(ParsePathType.FT, new TrecFTParser()); + pathType2parser.put(ParsePathType.LATIMES, new TrecLATimesParser()); + } + + static final Map pathName2Type = new HashMap(); + static { + for (ParsePathType ppt : ParsePathType.values()) { + pathName2Type.put(ppt.name().toUpperCase(Locale.ENGLISH),ppt); + } + } + + /** max length of walk up from file to its ancestors when looking for a known path type */ + private static final int MAX_PATH_LENGTH = 10; + + /** + * Compute the path type of a file by inspecting name of file and its parents + */ + public static ParsePathType pathType(File f) { + int pathLength = 0; + while (f != null && ++pathLength < MAX_PATH_LENGTH) { + ParsePathType ppt = pathName2Type.get(f.getName().toUpperCase(Locale.ENGLISH)); + if (ppt!=null) { + return ppt; + } + f = f.getParentFile(); + } + return DEFAULT_PATH_TYPE; + } + + /** + * parse the text prepared in docBuf into a result DocData, + * no synchronization is required. + * @param docData reusable result + * @param name name that should be set to the result + * @param trecSrc calling trec content source + * @param docBuf text to parse + * @param pathType type of parsed file, or null if unknown - may be used by + * parsers to alter their behavior according to the file path type. + */ + public abstract DocData parse(DocData docData, String name, TrecContentSource trecSrc, + StringBuilder docBuf, ParsePathType pathType) throws IOException, InterruptedException; + + /** + * strip tags from buf: each tag is replaced by a single blank. + * @return text obtained when stripping all tags from buf (Input StringBuilder is unmodified). + */ + public static String stripTags(StringBuilder buf, int start) { + return stripTags(buf.substring(start),0); + } + + /** + * strip tags from input. + * @see #stripTags(StringBuilder, int) + */ + public static String stripTags(String buf, int start) { + if (start>0) { + buf = buf.substring(0); + } + return buf.replaceAll("<[^>]*>", " "); + } + + /** + * Extract from buf the text of interest within specified tags + * @param buf entire input text + * @param startTag tag marking start of text of interest + * @param endTag tag marking end of text of interest + * @param maxPos if ≥ 0 sets a limit on start of text of interest + * @return text of interest or null if not found + */ + public static String extract(StringBuilder buf, String startTag, String endTag, int maxPos, String noisePrefixes[]) { + int k1 = buf.indexOf(startTag); + if (k1>=0 && (maxPos<0 || k1=0 && (maxPos<0 || k2=0 && k1a2<>1?",0)); + //} + +} diff --git a/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecFBISParser.java b/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecFBISParser.java index 8efcd04e91d..d85c5f35e5c 100644 --- a/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecFBISParser.java +++ b/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecFBISParser.java @@ -1,65 +1,65 @@ -package org.apache.lucene.benchmark.byTask.feeds; - -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import java.io.IOException; -import java.util.Date; - -/** - * Parser for the FBIS docs in trec disks 4+5 collection format - */ -public class TrecFBISParser extends TrecDocParser { - - private static final String HEADER = "
"; - private static final String HEADER_END = "
"; - private static final int HEADER_END_LENGTH = HEADER_END.length(); - - private static final String DATE1 = ""; - private static final String DATE1_END = ""; - - private static final String TI = ""; - private static final String TI_END = ""; - - @Override - public DocData parse(DocData docData, String name, TrecContentSource trecSrc, - StringBuilder docBuf, ParsePathType pathType) throws IOException, InterruptedException { - int mark = 0; // that much is skipped - // optionally skip some of the text, set date, title - Date date = null; - String title = null; - int h1 = docBuf.indexOf(HEADER); - if (h1>=0) { - int h2 = docBuf.indexOf(HEADER_END,h1); - mark = h2+HEADER_END_LENGTH; - // date... - String dateStr = extract(docBuf, DATE1, DATE1_END, h2, null); - if (dateStr != null) { - date = trecSrc.parseDate(dateStr); - } - // title... - title = extract(docBuf, TI, TI_END, h2, null); - } - docData.clear(); - docData.setName(name); - docData.setDate(date); - docData.setTitle(title); - docData.setBody(stripTags(docBuf, mark).toString()); - return docData; - } - -} +package org.apache.lucene.benchmark.byTask.feeds; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.util.Date; + +/** + * Parser for the FBIS docs in trec disks 4+5 collection format + */ +public class TrecFBISParser extends TrecDocParser { + + private static final String HEADER = "
"; + private static final String HEADER_END = "
"; + private static final int HEADER_END_LENGTH = HEADER_END.length(); + + private static final String DATE1 = ""; + private static final String DATE1_END = ""; + + private static final String TI = ""; + private static final String TI_END = ""; + + @Override + public DocData parse(DocData docData, String name, TrecContentSource trecSrc, + StringBuilder docBuf, ParsePathType pathType) throws IOException, InterruptedException { + int mark = 0; // that much is skipped + // optionally skip some of the text, set date, title + Date date = null; + String title = null; + int h1 = docBuf.indexOf(HEADER); + if (h1>=0) { + int h2 = docBuf.indexOf(HEADER_END,h1); + mark = h2+HEADER_END_LENGTH; + // date... + String dateStr = extract(docBuf, DATE1, DATE1_END, h2, null); + if (dateStr != null) { + date = trecSrc.parseDate(dateStr); + } + // title... + title = extract(docBuf, TI, TI_END, h2, null); + } + docData.clear(); + docData.setName(name); + docData.setDate(date); + docData.setTitle(title); + docData.setBody(stripTags(docBuf, mark).toString()); + return docData; + } + +} diff --git a/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecFR94Parser.java b/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecFR94Parser.java index ce6492120d7..a1298252215 100644 --- a/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecFR94Parser.java +++ b/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecFR94Parser.java @@ -1,66 +1,66 @@ -package org.apache.lucene.benchmark.byTask.feeds; - -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import java.io.IOException; -import java.util.Date; - -/** - * Parser for the FR94 docs in trec disks 4+5 collection format - */ -public class TrecFR94Parser extends TrecDocParser { - - private static final String TEXT = ""; - private static final int TEXT_LENGTH = TEXT.length(); - private static final String TEXT_END = ""; - - private static final String DATE = ""; - private static final String[] DATE_NOISE_PREFIXES = { - "DATE:", - "date:", //TODO improve date extraction for this format - "t.c.", - }; - private static final String DATE_END = ""; - - //TODO can we also extract title for this format? - - @Override - public DocData parse(DocData docData, String name, TrecContentSource trecSrc, - StringBuilder docBuf, ParsePathType pathType) throws IOException, InterruptedException { - int mark = 0; // that much is skipped - // optionally skip some of the text, set date (no title?) - Date date = null; - int h1 = docBuf.indexOf(TEXT); - if (h1>=0) { - int h2 = docBuf.indexOf(TEXT_END,h1); - mark = h1+TEXT_LENGTH; - // date... - String dateStr = extract(docBuf, DATE, DATE_END, h2, DATE_NOISE_PREFIXES); - if (dateStr != null) { - dateStr = stripTags(dateStr,0).toString(); - date = trecSrc.parseDate(dateStr.trim()); - } - } - docData.clear(); - docData.setName(name); - docData.setDate(date); - docData.setBody(stripTags(docBuf, mark).toString()); - return docData; - } - -} +package org.apache.lucene.benchmark.byTask.feeds; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.util.Date; + +/** + * Parser for the FR94 docs in trec disks 4+5 collection format + */ +public class TrecFR94Parser extends TrecDocParser { + + private static final String TEXT = ""; + private static final int TEXT_LENGTH = TEXT.length(); + private static final String TEXT_END = ""; + + private static final String DATE = ""; + private static final String[] DATE_NOISE_PREFIXES = { + "DATE:", + "date:", //TODO improve date extraction for this format + "t.c.", + }; + private static final String DATE_END = ""; + + //TODO can we also extract title for this format? + + @Override + public DocData parse(DocData docData, String name, TrecContentSource trecSrc, + StringBuilder docBuf, ParsePathType pathType) throws IOException, InterruptedException { + int mark = 0; // that much is skipped + // optionally skip some of the text, set date (no title?) + Date date = null; + int h1 = docBuf.indexOf(TEXT); + if (h1>=0) { + int h2 = docBuf.indexOf(TEXT_END,h1); + mark = h1+TEXT_LENGTH; + // date... + String dateStr = extract(docBuf, DATE, DATE_END, h2, DATE_NOISE_PREFIXES); + if (dateStr != null) { + dateStr = stripTags(dateStr,0).toString(); + date = trecSrc.parseDate(dateStr.trim()); + } + } + docData.clear(); + docData.setName(name); + docData.setDate(date); + docData.setBody(stripTags(docBuf, mark).toString()); + return docData; + } + +} diff --git a/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecFTParser.java b/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecFTParser.java index ab39d9c2860..4965338cdd0 100644 --- a/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecFTParser.java +++ b/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecFTParser.java @@ -1,57 +1,57 @@ -package org.apache.lucene.benchmark.byTask.feeds; - -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import java.io.IOException; -import java.util.Date; - -/** - * Parser for the FT docs in trec disks 4+5 collection format - */ -public class TrecFTParser extends TrecDocParser { - - private static final String DATE = ""; - private static final String DATE_END = ""; - - private static final String HEADLINE = ""; - private static final String HEADLINE_END = ""; - - @Override - public DocData parse(DocData docData, String name, TrecContentSource trecSrc, - StringBuilder docBuf, ParsePathType pathType) throws IOException, InterruptedException { - int mark = 0; // that much is skipped - - // date... - Date date = null; - String dateStr = extract(docBuf, DATE, DATE_END, -1, null); - if (dateStr != null) { - date = trecSrc.parseDate(dateStr); - } - - // title... - String title = extract(docBuf, HEADLINE, HEADLINE_END, -1, null); - - docData.clear(); - docData.setName(name); - docData.setDate(date); - docData.setTitle(title); - docData.setBody(stripTags(docBuf, mark).toString()); - return docData; - } - -} +package org.apache.lucene.benchmark.byTask.feeds; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.util.Date; + +/** + * Parser for the FT docs in trec disks 4+5 collection format + */ +public class TrecFTParser extends TrecDocParser { + + private static final String DATE = ""; + private static final String DATE_END = ""; + + private static final String HEADLINE = ""; + private static final String HEADLINE_END = ""; + + @Override + public DocData parse(DocData docData, String name, TrecContentSource trecSrc, + StringBuilder docBuf, ParsePathType pathType) throws IOException, InterruptedException { + int mark = 0; // that much is skipped + + // date... + Date date = null; + String dateStr = extract(docBuf, DATE, DATE_END, -1, null); + if (dateStr != null) { + date = trecSrc.parseDate(dateStr); + } + + // title... + String title = extract(docBuf, HEADLINE, HEADLINE_END, -1, null); + + docData.clear(); + docData.setName(name); + docData.setDate(date); + docData.setTitle(title); + docData.setBody(stripTags(docBuf, mark).toString()); + return docData; + } + +} diff --git a/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecLATimesParser.java b/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecLATimesParser.java index 367015bee36..6d7243f950a 100644 --- a/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecLATimesParser.java +++ b/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecLATimesParser.java @@ -1,71 +1,71 @@ -package org.apache.lucene.benchmark.byTask.feeds; - -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import java.io.IOException; -import java.util.Date; - -/** - * Parser for the FT docs in trec disks 4+5 collection format - */ -public class TrecLATimesParser extends TrecDocParser { - - private static final String DATE = ""; - private static final String DATE_END = ""; - private static final String DATE_NOISE = "day,"; // anything aftre the ',' - - private static final String SUBJECT = ""; - private static final String SUBJECT_END = ""; - private static final String HEADLINE = ""; - private static final String HEADLINE_END = ""; - - @Override - public DocData parse(DocData docData, String name, TrecContentSource trecSrc, - StringBuilder docBuf, ParsePathType pathType) throws IOException, InterruptedException { - int mark = 0; // that much is skipped - - // date... - Date date = null; - String dateStr = extract(docBuf, DATE, DATE_END, -1, null); - if (dateStr != null) { - int d2a = dateStr.indexOf(DATE_NOISE); - if (d2a > 0) { - dateStr = dateStr.substring(0,d2a+3); // we need the "day" part - } - dateStr = stripTags(dateStr,0).toString(); - date = trecSrc.parseDate(dateStr.trim()); - } - - // title... first try with SUBJECT, them with HEADLINE - String title = extract(docBuf, SUBJECT, SUBJECT_END, -1, null); - if (title==null) { - title = extract(docBuf, HEADLINE, HEADLINE_END, -1, null); - } - if (title!=null) { - title = stripTags(title,0).toString().trim(); - } - - docData.clear(); - docData.setName(name); - docData.setDate(date); - docData.setTitle(title); - docData.setBody(stripTags(docBuf, mark).toString()); - return docData; - } - -} +package org.apache.lucene.benchmark.byTask.feeds; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.util.Date; + +/** + * Parser for the FT docs in trec disks 4+5 collection format + */ +public class TrecLATimesParser extends TrecDocParser { + + private static final String DATE = ""; + private static final String DATE_END = ""; + private static final String DATE_NOISE = "day,"; // anything aftre the ',' + + private static final String SUBJECT = ""; + private static final String SUBJECT_END = ""; + private static final String HEADLINE = ""; + private static final String HEADLINE_END = ""; + + @Override + public DocData parse(DocData docData, String name, TrecContentSource trecSrc, + StringBuilder docBuf, ParsePathType pathType) throws IOException, InterruptedException { + int mark = 0; // that much is skipped + + // date... + Date date = null; + String dateStr = extract(docBuf, DATE, DATE_END, -1, null); + if (dateStr != null) { + int d2a = dateStr.indexOf(DATE_NOISE); + if (d2a > 0) { + dateStr = dateStr.substring(0,d2a+3); // we need the "day" part + } + dateStr = stripTags(dateStr,0).toString(); + date = trecSrc.parseDate(dateStr.trim()); + } + + // title... first try with SUBJECT, them with HEADLINE + String title = extract(docBuf, SUBJECT, SUBJECT_END, -1, null); + if (title==null) { + title = extract(docBuf, HEADLINE, HEADLINE_END, -1, null); + } + if (title!=null) { + title = stripTags(title,0).toString().trim(); + } + + docData.clear(); + docData.setName(name); + docData.setDate(date); + docData.setTitle(title); + docData.setBody(stripTags(docBuf, mark).toString()); + return docData; + } + +} diff --git a/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecParserByPath.java b/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecParserByPath.java index fc882035a01..503b2d6cb1e 100644 --- a/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecParserByPath.java +++ b/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecParserByPath.java @@ -1,33 +1,33 @@ -package org.apache.lucene.benchmark.byTask.feeds; - -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import java.io.IOException; -/** - * Parser for trec docs which selects the parser to apply according - * to the source files path, defaulting to {@link TrecGov2Parser}. - */ -public class TrecParserByPath extends TrecDocParser { - - @Override - public DocData parse(DocData docData, String name, TrecContentSource trecSrc, - StringBuilder docBuf, ParsePathType pathType) throws IOException, InterruptedException { - return pathType2parser.get(pathType).parse(docData, name, trecSrc, docBuf, pathType); - } - -} +package org.apache.lucene.benchmark.byTask.feeds; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +/** + * Parser for trec docs which selects the parser to apply according + * to the source files path, defaulting to {@link TrecGov2Parser}. + */ +public class TrecParserByPath extends TrecDocParser { + + @Override + public DocData parse(DocData docData, String name, TrecContentSource trecSrc, + StringBuilder docBuf, ParsePathType pathType) throws IOException, InterruptedException { + return pathType2parser.get(pathType).parse(docData, name, trecSrc, docBuf, pathType); + } + +} diff --git a/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/CreateIndexTask.java b/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/CreateIndexTask.java index 5a8f0ddbb9d..217818ce257 100644 --- a/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/CreateIndexTask.java +++ b/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/CreateIndexTask.java @@ -25,6 +25,7 @@ import org.apache.lucene.index.IndexDeletionPolicy; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.LogMergePolicy; +import org.apache.lucene.index.TieredMergePolicy; import org.apache.lucene.index.MergeScheduler; import org.apache.lucene.index.ConcurrentMergeScheduler; import org.apache.lucene.index.MergePolicy; @@ -150,6 +151,9 @@ public class CreateIndexTask extends PerfTask { LogMergePolicy logMergePolicy = (LogMergePolicy) iwConf.getMergePolicy(); logMergePolicy.setUseCompoundFile(isCompound); logMergePolicy.setMergeFactor(config.get("merge.factor",OpenIndexTask.DEFAULT_MERGE_PFACTOR)); + } else if(iwConf.getMergePolicy() instanceof TieredMergePolicy) { + TieredMergePolicy tieredMergePolicy = (TieredMergePolicy) iwConf.getMergePolicy(); + tieredMergePolicy.setUseCompoundFile(isCompound); } } final double ramBuffer = config.get("ram.flush.mb",OpenIndexTask.DEFAULT_RAM_FLUSH_MB); diff --git a/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/NewAnalyzerTask.java b/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/NewAnalyzerTask.java index 6d301142229..8ce123e160f 100644 --- a/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/NewAnalyzerTask.java +++ b/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/NewAnalyzerTask.java @@ -20,9 +20,7 @@ import org.apache.lucene.benchmark.byTask.PerfRunData; import org.apache.lucene.util.Version; import java.io.IOException; -import java.util.ArrayList; -import java.util.List; -import java.util.StringTokenizer; +import java.util.*; import java.lang.reflect.Constructor; /** @@ -54,20 +52,32 @@ public class NewAnalyzerTask extends PerfTask { public int doLogic() throws IOException { String className = null; try { - if (current >= analyzerClassNames.size()) - { + if (current >= analyzerClassNames.size()) { current = 0; } className = analyzerClassNames.get(current++); - if (className == null || className.equals("")) - { - className = "org.apache.lucene.analysis.standard.StandardAnalyzer"; + Analyzer analyzer = null; + if (null == className || 0 == className.length()) { + className = "org.apache.lucene.analysis.standard.StandardAnalyzer"; } - if (className.indexOf(".") == -1 || className.startsWith("standard."))//there is no package name, assume o.a.l.analysis - { - className = "org.apache.lucene.analysis." + className; + if (-1 == className.indexOf(".")) { + try { + // If no package, first attempt to instantiate a core analyzer + String coreClassName = "org.apache.lucene.analysis.core." + className; + analyzer = createAnalyzer(coreClassName); + className = coreClassName; + } catch (ClassNotFoundException e) { + // If not a core analyzer, try the base analysis package + className = "org.apache.lucene.analysis." + className; + analyzer = createAnalyzer(className); + } + } else { + if (className.startsWith("standard.")) { + className = "org.apache.lucene.analysis." + className; + } + analyzer = createAnalyzer(className); } - getRunData().setAnalyzer(createAnalyzer(className)); + getRunData().setAnalyzer(analyzer); System.out.println("Changed Analyzer to: " + className); } catch (Exception e) { throw new RuntimeException("Error creating Analyzer: " + className, e); diff --git a/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/NewCollationAnalyzerTask.java b/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/NewCollationAnalyzerTask.java index 2dd29ece722..519512e566e 100644 --- a/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/NewCollationAnalyzerTask.java +++ b/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/NewCollationAnalyzerTask.java @@ -24,6 +24,7 @@ import java.util.StringTokenizer; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.benchmark.byTask.PerfRunData; +import org.apache.lucene.util.Version; /** * Task to support benchmarking collation. @@ -65,8 +66,8 @@ public class NewCollationAnalyzerTask extends PerfTask { final Class clazz = Class.forName(impl.className) .asSubclass(Analyzer.class); - Constructor ctor = clazz.getConstructor(collatorClazz); - return ctor.newInstance(collator); + Constructor ctor = clazz.getConstructor(Version.class, collatorClazz); + return ctor.newInstance(Version.LUCENE_CURRENT, collator); } @Override diff --git a/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/NewShingleAnalyzerTask.java b/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/NewShingleAnalyzerTask.java index 27b805c41a7..a4af36ae12d 100644 --- a/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/NewShingleAnalyzerTask.java +++ b/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/NewShingleAnalyzerTask.java @@ -17,13 +17,11 @@ package org.apache.lucene.benchmark.byTask.tasks; * limitations under the License. */ -import java.lang.reflect.Constructor; import java.util.StringTokenizer; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.shingle.ShingleAnalyzerWrapper; import org.apache.lucene.benchmark.byTask.PerfRunData; -import org.apache.lucene.util.Version; /** * Task to support benchmarking ShingleFilter / ShingleAnalyzerWrapper @@ -45,26 +43,26 @@ public class NewShingleAnalyzerTask extends PerfTask { } private void setAnalyzer() throws Exception { - Class clazz = null; - Analyzer wrappedAnalyzer; - try { - if (analyzerClassName == null || analyzerClassName.equals("")) { - analyzerClassName - = "org.apache.lucene.analysis.standard.StandardAnalyzer"; + Analyzer wrappedAnalyzer = null; + if (null == analyzerClassName || 0 == analyzerClassName.length()) { + analyzerClassName = "org.apache.lucene.analysis.standard.StandardAnalyzer"; + } + if (-1 == analyzerClassName.indexOf(".")) { + String coreClassName = "org.apache.lucene.analysis.core." + analyzerClassName; + try { + // If there is no package, first attempt to instantiate a core analyzer + wrappedAnalyzer = NewAnalyzerTask.createAnalyzer(coreClassName); + analyzerClassName = coreClassName; + } catch (ClassNotFoundException e) { + // If this is not a core analyzer, try the base analysis package + analyzerClassName = "org.apache.lucene.analysis." + analyzerClassName; + wrappedAnalyzer = NewAnalyzerTask.createAnalyzer(analyzerClassName); } - if (analyzerClassName.indexOf(".") == -1 - || analyzerClassName.startsWith("standard.")) { - //there is no package name, assume o.a.l.analysis + } else { + if (analyzerClassName.startsWith("standard.")) { analyzerClassName = "org.apache.lucene.analysis." + analyzerClassName; } - clazz = Class.forName(analyzerClassName).asSubclass(Analyzer.class); - // first try to use a ctor with version parameter (needed for many new - // Analyzers that have no default one anymore) - Constructor ctor = clazz.getConstructor(Version.class); - wrappedAnalyzer = ctor.newInstance(Version.LUCENE_CURRENT); - } catch (NoSuchMethodException e) { - // otherwise use default ctor - wrappedAnalyzer = clazz.newInstance(); + wrappedAnalyzer = NewAnalyzerTask.createAnalyzer(analyzerClassName); } ShingleAnalyzerWrapper analyzer = new ShingleAnalyzerWrapper(wrappedAnalyzer, maxShingleSize); @@ -77,7 +75,7 @@ public class NewShingleAnalyzerTask extends PerfTask { try { setAnalyzer(); System.out.println - ("Changed Analyzer to: ShingleAnalyzerWrapper, wrapping ShingleFilter over" + ("Changed Analyzer to: ShingleAnalyzerWrapper, wrapping ShingleFilter over " + analyzerClassName); } catch (Exception e) { throw new RuntimeException("Error creating Analyzer", e); diff --git a/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/ReadTask.java b/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/ReadTask.java index 414cf23c4c6..8c30924d2ce 100644 --- a/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/ReadTask.java +++ b/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/ReadTask.java @@ -30,6 +30,7 @@ import org.apache.lucene.benchmark.byTask.feeds.QueryMaker; import org.apache.lucene.document.Document; import org.apache.lucene.document.Fieldable; import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.MultiFields; import org.apache.lucene.search.Collector; import org.apache.lucene.search.TopDocs; import org.apache.lucene.search.MultiTermQuery; @@ -95,9 +96,9 @@ public abstract class ReadTask extends PerfTask { // optionally warm and add num docs traversed to count if (withWarm()) { Document doc = null; - Bits delDocs = reader.getDeletedDocs(); + Bits delDocs = MultiFields.getDeletedDocs(reader); for (int m = 0; m < reader.maxDoc(); m++) { - if (!delDocs.get(m)) { + if (null == delDocs || ! delDocs.get(m)) { doc = reader.document(m); res += (doc == null ? 0 : 1); } diff --git a/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/ReadTokensTask.java b/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/ReadTokensTask.java index fa0ae999b45..f702cb8ee7f 100644 --- a/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/ReadTokensTask.java +++ b/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/ReadTokensTask.java @@ -22,6 +22,7 @@ import java.util.List; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute; import org.apache.lucene.benchmark.byTask.PerfRunData; import org.apache.lucene.benchmark.byTask.feeds.DocMaker; import org.apache.lucene.document.Document; @@ -98,8 +99,11 @@ public class ReadTokensTask extends PerfTask { // reset the TokenStream to the first token stream.reset(); - while(stream.incrementToken()) + TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class); + while(stream.incrementToken()) { + termAtt.fillBytesRef(); tokenCount++; + } } totalTokenCount += tokenCount; return tokenCount; diff --git a/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTask.java b/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTask.java index 197fe45c153..90cb4a3ee20 100644 --- a/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTask.java +++ b/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTask.java @@ -43,7 +43,8 @@ import org.apache.lucene.document.Field; *

* The format of the output is set according to the output file extension. * Compression is recommended when the output file is expected to be large. - * See info on file extensions in {@link StreamUtils.Type} + * See info on file extensions in + * {@link org.apache.lucene.benchmark.byTask.utils.StreamUtils.Type} *

* Supports the following parameters: *

    diff --git a/modules/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java b/modules/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java index 346c557db3a..d9ca9517e80 100755 --- a/modules/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java +++ b/modules/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java @@ -29,7 +29,7 @@ import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.BaseTokenStreamTestCase; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; +import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute; import org.apache.lucene.benchmark.BenchmarkTestCase; import org.apache.lucene.benchmark.byTask.feeds.DocMaker; import org.apache.lucene.benchmark.byTask.feeds.ReutersQueryMaker; @@ -96,7 +96,7 @@ public class TestPerfTasksLogic extends BenchmarkTestCase { assertTrue("Index does not exist?...!", IndexReader.indexExists(benchmark.getRunData().getDirectory())); // now we should be able to open the index for write. IndexWriter iw = new IndexWriter(benchmark.getRunData().getDirectory(), - new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()) + new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)) .setOpenMode(OpenMode.APPEND)); iw.close(); IndexReader ir = IndexReader.open(benchmark.getRunData().getDirectory(), true); @@ -183,7 +183,7 @@ public class TestPerfTasksLogic extends BenchmarkTestCase { assertTrue("Index does not exist?...!", IndexReader.indexExists(benchmark.getRunData().getDirectory())); // now we should be able to open the index for write. - IndexWriter iw = new IndexWriter(benchmark.getRunData().getDirectory(), new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).setOpenMode(OpenMode.APPEND)); + IndexWriter iw = new IndexWriter(benchmark.getRunData().getDirectory(), new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setOpenMode(OpenMode.APPEND)); iw.close(); IndexReader ir = IndexReader.open(benchmark.getRunData().getDirectory(), true); assertEquals("100 docs were added to the index, this is what we expect to find!",100,ir.numDocs()); @@ -222,7 +222,7 @@ public class TestPerfTasksLogic extends BenchmarkTestCase { assertTrue("Index does not exist?...!", IndexReader.indexExists(benchmark.getRunData().getDirectory())); // now we should be able to open the index for write. - IndexWriter iw = new IndexWriter(benchmark.getRunData().getDirectory(), new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).setOpenMode(OpenMode.APPEND)); + IndexWriter iw = new IndexWriter(benchmark.getRunData().getDirectory(), new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setOpenMode(OpenMode.APPEND)); iw.close(); IndexReader ir = IndexReader.open(benchmark.getRunData().getDirectory(), true); assertEquals("1000 docs were added to the index, this is what we expect to find!",1000,ir.numDocs()); @@ -295,7 +295,7 @@ public class TestPerfTasksLogic extends BenchmarkTestCase { assertEquals("TestSearchTask was supposed to be called!",139,CountingSearchTestTask.numSearches); assertTrue("Index does not exist?...!", IndexReader.indexExists(benchmark.getRunData().getDirectory())); // now we should be able to open the index for write. - IndexWriter iw = new IndexWriter(benchmark.getRunData().getDirectory(), new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).setOpenMode(OpenMode.APPEND)); + IndexWriter iw = new IndexWriter(benchmark.getRunData().getDirectory(), new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setOpenMode(OpenMode.APPEND)); iw.close(); IndexReader ir = IndexReader.open(benchmark.getRunData().getDirectory(), true); assertEquals("1 docs were added to the index, this is what we expect to find!",1,ir.numDocs()); @@ -407,7 +407,7 @@ public class TestPerfTasksLogic extends BenchmarkTestCase { // Index the line docs String algLines2[] = { "# ----- properties ", - "analyzer=org.apache.lucene.analysis.MockAnalyzer", + "analyzer=org.apache.lucene.analysis.core.WhitespaceAnalyzer", "content.source=org.apache.lucene.benchmark.byTask.feeds.LineDocSource", "docs.file=" + lineFile.getAbsolutePath().replace('\\', '/'), "content.source.forever=false", @@ -425,7 +425,7 @@ public class TestPerfTasksLogic extends BenchmarkTestCase { // now we should be able to open the index for write. IndexWriter iw = new IndexWriter(benchmark.getRunData().getDirectory(), - new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()) + new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)) .setOpenMode(OpenMode.APPEND)); iw.close(); @@ -448,7 +448,7 @@ public class TestPerfTasksLogic extends BenchmarkTestCase { // then build index from the same docs String algLines1[] = { "# ----- properties ", - "analyzer=org.apache.lucene.analysis.MockAnalyzer", + "analyzer=org.apache.lucene.analysis.core.WhitespaceAnalyzer", "content.source=org.apache.lucene.benchmark.byTask.feeds.LineDocSource", "docs.file=" + getReuters20LinesFile(), "# ----- alg ", @@ -934,24 +934,24 @@ public class TestPerfTasksLogic extends BenchmarkTestCase { public void testCollator() throws Exception { // ROOT locale Benchmark benchmark = execBenchmark(getCollatorConfig("ROOT", "impl:jdk")); - CollationKeyAnalyzer expected = new CollationKeyAnalyzer(Collator + CollationKeyAnalyzer expected = new CollationKeyAnalyzer(TEST_VERSION_CURRENT, Collator .getInstance(new Locale(""))); assertEqualCollation(expected, benchmark.getRunData().getAnalyzer(), "foobar"); // specify just a language benchmark = execBenchmark(getCollatorConfig("de", "impl:jdk")); - expected = new CollationKeyAnalyzer(Collator.getInstance(new Locale("de"))); + expected = new CollationKeyAnalyzer(TEST_VERSION_CURRENT, Collator.getInstance(new Locale("de"))); assertEqualCollation(expected, benchmark.getRunData().getAnalyzer(), "foobar"); // specify language + country benchmark = execBenchmark(getCollatorConfig("en,US", "impl:jdk")); - expected = new CollationKeyAnalyzer(Collator.getInstance(new Locale("en", + expected = new CollationKeyAnalyzer(TEST_VERSION_CURRENT, Collator.getInstance(new Locale("en", "US"))); assertEqualCollation(expected, benchmark.getRunData().getAnalyzer(), "foobar"); // specify language + country + variant benchmark = execBenchmark(getCollatorConfig("no,NO,NY", "impl:jdk")); - expected = new CollationKeyAnalyzer(Collator.getInstance(new Locale("no", + expected = new CollationKeyAnalyzer(TEST_VERSION_CURRENT, Collator.getInstance(new Locale("no", "NO", "NY"))); assertEqualCollation(expected, benchmark.getRunData().getAnalyzer(), "foobar"); } @@ -962,11 +962,15 @@ public class TestPerfTasksLogic extends BenchmarkTestCase { TokenStream ts2 = a2.tokenStream("bogus", new StringReader(text)); ts1.reset(); ts2.reset(); - CharTermAttribute termAtt1 = ts1.addAttribute(CharTermAttribute.class); - CharTermAttribute termAtt2 = ts2.addAttribute(CharTermAttribute.class); + TermToBytesRefAttribute termAtt1 = ts1.addAttribute(TermToBytesRefAttribute.class); + TermToBytesRefAttribute termAtt2 = ts2.addAttribute(TermToBytesRefAttribute.class); assertTrue(ts1.incrementToken()); assertTrue(ts2.incrementToken()); - assertEquals(termAtt1.toString(), termAtt2.toString()); + BytesRef bytes1 = termAtt1.getBytesRef(); + BytesRef bytes2 = termAtt2.getBytesRef(); + termAtt1.fillBytesRef(); + termAtt2.fillBytesRef(); + assertEquals(bytes1, bytes2); assertFalse(ts1.incrementToken()); assertFalse(ts2.incrementToken()); ts1.close(); @@ -1017,18 +1021,18 @@ public class TestPerfTasksLogic extends BenchmarkTestCase { "two three four", "three four", "three four five", "four five", "four five six", "five six" }); - // MockAnalyzer, default maxShingleSize and outputUnigrams + // WhitespaceAnalyzer, default maxShingleSize and outputUnigrams benchmark = execBenchmark - (getShingleConfig("analyzer:MockAnalyzer")); + (getShingleConfig("analyzer:WhitespaceAnalyzer")); assertEqualShingle(benchmark.getRunData().getAnalyzer(), text, new String[] { "one,two,three,", "one,two,three, four", "four", "four five", "five", "five six", "six" }); - // MockAnalyzer, maxShingleSize=3 and outputUnigrams=false + // WhitespaceAnalyzer, maxShingleSize=3 and outputUnigrams=false benchmark = execBenchmark (getShingleConfig - ("outputUnigrams:false,maxShingleSize:3,analyzer:MockAnalyzer")); + ("outputUnigrams:false,maxShingleSize:3,analyzer:WhitespaceAnalyzer")); assertEqualShingle(benchmark.getRunData().getAnalyzer(), text, new String[] { "one,two,three, four", "one,two,three, four five", diff --git a/modules/benchmark/src/test/org/apache/lucene/benchmark/byTask/feeds/DocMakerTest.java b/modules/benchmark/src/test/org/apache/lucene/benchmark/byTask/feeds/DocMakerTest.java index 5ee7b13cd3a..eb0ed42c09c 100644 --- a/modules/benchmark/src/test/org/apache/lucene/benchmark/byTask/feeds/DocMakerTest.java +++ b/modules/benchmark/src/test/org/apache/lucene/benchmark/byTask/feeds/DocMakerTest.java @@ -21,6 +21,7 @@ import java.io.IOException; import java.util.Properties; import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.analysis.core.WhitespaceAnalyzer; import org.apache.lucene.benchmark.BenchmarkTestCase; import org.apache.lucene.benchmark.byTask.PerfRunData; import org.apache.lucene.benchmark.byTask.tasks.AddDocTask; @@ -70,7 +71,7 @@ public class DocMakerTest extends BenchmarkTestCase { Properties props = new Properties(); // Indexing configuration. - props.setProperty("analyzer", MockAnalyzer.class.getName()); + props.setProperty("analyzer", WhitespaceAnalyzer.class.getName()); props.setProperty("content.source", OneDocSource.class.getName()); props.setProperty("directory", "RAMDirectory"); if (setIndexProps) { @@ -99,7 +100,7 @@ public class DocMakerTest extends BenchmarkTestCase { Properties props = new Properties(); // Indexing configuration. - props.setProperty("analyzer", MockAnalyzer.class.getName()); + props.setProperty("analyzer", WhitespaceAnalyzer.class.getName()); props.setProperty("content.source", OneDocSource.class.getName()); props.setProperty("directory", "RAMDirectory"); if (setNormsProp) { diff --git a/modules/benchmark/src/test/org/apache/lucene/benchmark/byTask/feeds/LineDocSourceTest.java b/modules/benchmark/src/test/org/apache/lucene/benchmark/byTask/feeds/LineDocSourceTest.java index 7cc7dc0da2d..e42f1c45817 100644 --- a/modules/benchmark/src/test/org/apache/lucene/benchmark/byTask/feeds/LineDocSourceTest.java +++ b/modules/benchmark/src/test/org/apache/lucene/benchmark/byTask/feeds/LineDocSourceTest.java @@ -27,6 +27,7 @@ import java.util.Properties; import org.apache.commons.compress.compressors.CompressorStreamFactory; import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.analysis.core.WhitespaceAnalyzer; import org.apache.lucene.benchmark.BenchmarkTestCase; import org.apache.lucene.benchmark.byTask.PerfRunData; import org.apache.lucene.benchmark.byTask.feeds.LineDocSource.HeaderLineParser; @@ -121,7 +122,7 @@ public class LineDocSourceTest extends BenchmarkTestCase { } // Indexing configuration. - props.setProperty("analyzer", MockAnalyzer.class.getName()); + props.setProperty("analyzer", WhitespaceAnalyzer.class.getName()); props.setProperty("content.source", LineDocSource.class.getName()); props.setProperty("directory", "RAMDirectory"); props.setProperty("doc.stored", "true"); diff --git a/modules/benchmark/src/test/org/apache/lucene/benchmark/byTask/tasks/alt/AltTestTask.java b/modules/benchmark/src/test/org/apache/lucene/benchmark/byTask/tasks/alt/AltTestTask.java index a38583ff69c..617b4264800 100644 --- a/modules/benchmark/src/test/org/apache/lucene/benchmark/byTask/tasks/alt/AltTestTask.java +++ b/modules/benchmark/src/test/org/apache/lucene/benchmark/byTask/tasks/alt/AltTestTask.java @@ -1,37 +1,37 @@ -package org.apache.lucene.benchmark.byTask.tasks.alt; - -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import org.apache.lucene.benchmark.byTask.PerfRunData; -import org.apache.lucene.benchmark.byTask.tasks.PerfTask; - -/** - * {@link PerfTask} which does nothing, but is in a different package - */ -public class AltTestTask extends PerfTask { - - public AltTestTask(PerfRunData runData) { - super(runData); - } - - @Override - public int doLogic() throws Exception { - return 0; - } - -} +package org.apache.lucene.benchmark.byTask.tasks.alt; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.benchmark.byTask.PerfRunData; +import org.apache.lucene.benchmark.byTask.tasks.PerfTask; + +/** + * {@link PerfTask} which does nothing, but is in a different package + */ +public class AltTestTask extends PerfTask { + + public AltTestTask(PerfRunData runData) { + super(runData); + } + + @Override + public int doLogic() throws Exception { + return 0; + } + +} diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index 21c28d78961..0ed4698c902 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -60,6 +60,11 @@ Detailed Change List New Features ---------------------- + +* SOLR-2378: A new, automaton-based, implementation of suggest (autocomplete) + component, offering an order of magnitude smaller memory consumption + compared to ternary trees and jaspell and very fast lookups at runtime. + (Dawid Weiss) * SOLR-571: The autowarmCount for LRUCaches (LRUCache and FastLRUCache) now supports "percentages" which get evaluated relative the current size of @@ -75,7 +80,7 @@ New Features * SOLR-1682: (SOLR-236, SOLR-237, SOLR-1773, SOLR-1311) Search grouping / Field collapsing. (Martijn van Groningen, Emmanuel Keller, Shalin Shekhar Mangar, - Koji Sekiguchi, Iv�n de Prado, Ryan McKinley, Marc Sturlese, Peter Karich, + Koji Sekiguchi, Iván de Prado, Ryan McKinley, Marc Sturlese, Peter Karich, Bojan Smid, Charles Hornberger, Dieter Grad, Dmitry Lihachev, Doug Steigerwald, Karsten Sperling, Michael Gundlach, Oleg Gnatovskiy, Thomas Traeger, Harish Agarwal, yonik) @@ -100,8 +105,10 @@ New Features levenshtein automata. (rmuir) * SOLR-1873: SolrCloud - added shared/central config and core/shard managment via zookeeper, - built-in load balancing, and infrastructure for future SolrCloud work. - (yonik, Mark Miller) + built-in load balancing, and infrastructure for future SolrCloud work. (yonik, Mark Miller) + Additional Work: + SOLR-2324: SolrCloud solr.xml parameters are not persisted by CoreContainer. + (Massimo Schiavon, Mark Miller) * SOLR-1729: Evaluation of NOW for date math is done only once per request for consistency, and is also propagated to shards in distributed search. @@ -110,10 +117,34 @@ New Features * SOLR-1566: Transforming documents in the ResponseWriters. This will allow for more complex results in responses and open the door for function queries - as results. (ryan with patches from grant, noble, cmale, yonik) + as results. (ryan with patches from grant, noble, cmale, yonik, Jan Høydahl) * SOLR-2417: Add explain info directly to return documents using ?fl=_explain_ (ryan) + +* SOLR-2396: Add CollationField, which is much more efficient than + the Solr 3.x CollationKeyFilterFactory, and also supports + Locale-sensitive range queries. (rmuir) + +* SOLR-2338: Add support for using in a schema's fieldType, + for customizing scoring on a per-field basis. (hossman, yonik, rmuir) +* SOLR-2335: New 'field("...")' function syntax for refering to complex + field names (containing whitespace or special characters) in functions. + +* SOLR-1709: Distributed support for Date and Numeric Range Faceting + (Peter Sturge, David Smiley, hossman) + +* SOLR-2383: /browse improvements: generalize range and date facet display + (Jan Høydahl via yonik) + +* SOLR-2272: Pseudo-join queries / filters. Examples: + To restrict to the set of parents with at least one blue-eyed child: + fq={!join from=parent to=name}eyes:blue + To restrict to the set of children with at least one blue-eyed parent: + fq={!join from=name to=parent}eyes:blue + (yonik) + + Optimizations ---------------------- @@ -199,7 +230,13 @@ Other Changes * SOLR-2423: FieldType argument changed from String to Object Conversion from SolrInputDocument > Object > Fieldable is now managed by FieldType rather then DocumentBuilder. (ryan) + +* SOLR-2061: Pull base tests out into a new Solr Test Framework module, + and publish binary, javadoc, and source test-framework jars. + (Drew Farris, Robert Muir, Steve Rowe) +* SOLR-2461: QuerySenderListener and AbstractSolrEventListener are + now public (hossman) Documentation ---------------------- @@ -207,6 +244,8 @@ Documentation * SOLR-2232: Improved README info on solr.solr.home in examples (Eric Pugh and hossman) +* LUCENE-3006: Building javadocs will fail on warnings by default. Override with -Dfailonjavadocwarning=false (sarowe, gsingers) + ================== 3.2.0-dev ================== Versions of Major Components --------------------- @@ -214,26 +253,95 @@ Apache Lucene trunk Apache Tika 0.8 Carrot2 3.4.2 -(No Changes) -================== 3.1.0-dev ================== +Upgrading from Solr 3.1 +---------------------- + +* The updateRequestProcessorChain for a RequestHandler is now defined + with update.chain rather than update.processor. The latter still works, + but has been deprecated. + +Detailed Change List +---------------------- + +New Features +---------------------- + +Optimizations +---------------------- + +Bug Fixes +---------------------- + +* SOLR-2445: Change the default qt to blank in form.jsp, because there is no "standard" + request handler unless you have it in your solrconfig.xml explicitly. (koji) + +* SOLR-2455: Prevent double submit of forms in admin interface. + (Jeffrey Chang via uschindler) + +* SOLR-2464: Fix potential slowness in QueryValueSource (the query() function) when + the query is very sparse and may not match any documents in a segment. (yonik) + +* SOLR-2469: When using java replication with replicateAfter=startup, the first + commit point on server startup is never removed. (yonik) + +* SOLR-2466: SolrJ's CommonsHttpSolrServer would retry requests on failure, regardless + of the configured maxRetries, due to HttpClient having it's own retry mechanism + by default. The retryCount of HttpClient is now set to 0, and SolrJ does + the retry. (yonik) + +* SOLR-2409: edismax parser - treat the text of a fielded query as a literal if the + fieldname does not exist. For example Mission: Impossible should not search on + the "Mission" field unless it's a valid field in the schema. (Ryan McKinley, yonik) + +* SOLR-2403: facet.sort=index reported incorrect results for distributed search + in a number of scenarios when facet.mincount>0. This patch also adds some + performance/algorithmic improvements when (facet.sort=count && facet.mincount=1 + && facet.limit=-1) and when (facet.sort=index && facet.mincount>0) (yonik) + +* SOLR-2333: The "rename" core admin action does not persist the new name to solr.xml + (Rasmus Hahn, Paul R. Brown via Mark Miller) + +* SOLR-2390: Performance of usePhraseHighlighter is terrible on very large Documents, + regardless of hl.maxDocCharsToAnalyze. (Mark Miller) + +* SOLR-2474: The helper TokenStreams in analysis.jsp and AnalysisRequestHandlerBase + did not clear all attributes so they displayed incorrect attribute values for tokens + in later filter stages. (uschindler, rmuir, yonik) + +Other Changes +---------------------- + +* SOLR-2105: Rename RequestHandler param 'update.processor' to 'update.chain'. + (Jan Høydahl via Mark Miller) + +Build +---------------------- + +Documentation +---------------------- + + +================== 3.1.0 ================== Versions of Major Components --------------------- -Apache Lucene trunk +Apache Lucene 3.1.0 Apache Tika 0.8 Carrot2 3.4.2 +Velocity 1.6.1 and Velocity Tools 2.0-beta3 +Apache UIMA 2.3.1-SNAPSHOT Upgrading from Solr 1.4 ---------------------- -* The Lucene index format has changed and as a result, once you upgrade, +* The Lucene index format has changed and as a result, once you upgrade, previous versions of Solr will no longer be able to read your indices. In a master/slave configuration, all searchers/slaves should be upgraded before the master. If the master were to be updated first, the older searchers would not be able to read the new index format. -* The Solr JavaBin format has changed as of Solr 3.1. If you are using the +* The Solr JavaBin format has changed as of Solr 3.1. If you are using the JavaBin format, you will need to upgrade your SolrJ client. (SOLR-2034) * The experimental ALIAS command has been removed (SOLR-1637) @@ -244,10 +352,10 @@ Upgrading from Solr 1.4 is deprecated (SOLR-1696) * The deprecated HTMLStripReader, HTMLStripWhitespaceTokenizerFactory and - HTMLStripStandardTokenizerFactory were removed. To strip HTML tags, - HTMLStripCharFilter should be used instead, and it works with any + HTMLStripStandardTokenizerFactory were removed. To strip HTML tags, + HTMLStripCharFilter should be used instead, and it works with any Tokenizer of your choice. (SOLR-1657) - + * Field compression is no longer supported. Fields that were formerly compressed will be uncompressed as index segments are merged. For shorter fields, this may actually be an improvement, as the compression @@ -256,24 +364,24 @@ Upgrading from Solr 1.4 * SOLR-1845: The TermsComponent response format was changed so that the "terms" container is a map instead of a named list. This affects response formats like JSON, but not XML. (yonik) - + * SOLR-1876: All Analyzers and TokenStreams are now final to enforce the decorator pattern. (rmuir, uschindler) -* LUCENE-2608: Added the ability to specify the accuracy on a per request basis. +* LUCENE-2608: Added the ability to specify the accuracy on a per request basis. It is recommended that implementations of SolrSpellChecker should change over to the new SolrSpellChecker methods using the new SpellingOptions class, but are not required to. While this change is backward compatible, the trunk version of Solr has already dropped support for all but the SpellingOptions method. (gsingers) * readercycle script was removed. (SOLR-2046) -* In previous releases, sorting or evaluating function queries on +* In previous releases, sorting or evaluating function queries on fields that were "multiValued" (either by explicit declaration in schema.xml or by implict behavior because the "version" attribute on the schema was less then 1.2) did not generally work, but it would sometimes silently act as if it succeeded and order the docs arbitrarily. Solr will now fail on any attempt to sort, or apply a - function to, multi-valued fields + function to, multi-valued fields * The DataImportHandler jars are no longer included in the solr WAR and should be added in Solr's lib directory, or referenced @@ -343,13 +451,13 @@ New Features * SOLR-1379: Add RAMDirectoryFactory for non-persistent in memory index storage. (Alex Baranov via yonik) -* SOLR-1857: Synced Solr analysis with Lucene 3.1. Added KeywordMarkerFilterFactory - and StemmerOverrideFilterFactory, which can be used to tune stemming algorithms. +* SOLR-1857: Synced Solr analysis with Lucene 3.1. Added KeywordMarkerFilterFactory + and StemmerOverrideFilterFactory, which can be used to tune stemming algorithms. Added factories for Bulgarian, Czech, Hindi, Turkish, and Wikipedia analysis. Improved the performance of SnowballPorterFilterFactory. (rmuir) -* SOLR-1657: Converted remaining TokenStreams to the Attributes-based API. All Solr - TokenFilters now support custom Attributes, and some have improved performance: +* SOLR-1657: Converted remaining TokenStreams to the Attributes-based API. All Solr + TokenFilters now support custom Attributes, and some have improved performance: especially WordDelimiterFilter and CommonGramsFilter. (rmuir, cmale, uschindler) * SOLR-1740: ShingleFilterFactory supports the "minShingleSize" and "tokenSeparator" @@ -358,10 +466,10 @@ New Features * SOLR-744: ShingleFilterFactory supports the "outputUnigramsIfNoShingles" parameter, to output unigrams if the number of input tokens is fewer than - minShingleSize, and no shingles can be generated. + minShingleSize, and no shingles can be generated. (Chris Harris via Steven Rowe) -* SOLR-1923: PhoneticFilterFactory now has support for the +* SOLR-1923: PhoneticFilterFactory now has support for the Caverphone algorithm. (rmuir) * SOLR-1957: The VelocityResponseWriter contrib moved to core. @@ -429,7 +537,7 @@ New Features (Ankul Garg, Jason Rutherglen, Shalin Shekhar Mangar, Grant Ingersoll, Robert Muir, ab) * SOLR-1568: Added "native" filtering support for PointType, GeohashField. Added LatLonType with filtering support too. See - http://wiki.apache.org/solr/SpatialSearch and the example. Refactored some items in Lucene spatial. + http://wiki.apache.org/solr/SpatialSearch and the example. Refactored some items in Lucene spatial. Removed SpatialTileField as the underlying CartesianTier is broken beyond repair and is going to be moved. (gsingers) * SOLR-2128: Full parameter substitution for function queries. @@ -484,7 +592,7 @@ Optimizations Bug Fixes ---------------------- -* SOLR-1769: Solr 1.4 Replication - Repeater throwing NullPointerException (Jörgen Rydenius via noble) +* SOLR-1769: Solr 1.4 Replication - Repeater throwing NullPointerException (Jörgen Rydenius via noble) * SOLR-1432: Make the new ValueSource.getValues(context,reader) delegate to the original ValueSource.getValues(reader) so custom sources @@ -507,8 +615,8 @@ Bug Fixes * SOLR-1584: SolrJ - SolrQuery.setIncludeScore() incorrectly added fl=score to the parameter list instead of appending score to the existing field list. (yonik) - -* SOLR-1580: Solr Configuration ignores 'mergeFactor' parameter, always + +* SOLR-1580: Solr Configuration ignores 'mergeFactor' parameter, always uses Lucene default. (Lance Norskog via Mark Miller) * SOLR-1593: ReverseWildcardFilter didn't work for surrogate pairs @@ -525,7 +633,7 @@ Bug Fixes set when streaming updates, rather than using UTF-8 as the HTTP headers indicated, leading to an encoding mismatch. (hossman, yonik) -* SOLR-1587: A distributed search request with fl=score, didn't match +* SOLR-1587: A distributed search request with fl=score, didn't match the behavior of a non-distributed request since it only returned the id,score fields instead of all fields in addition to score. (yonik) @@ -534,7 +642,7 @@ Bug Fixes * SOLR-1615: Backslash escaping did not work in quoted strings for local param arguments. (Wojtek Piaseczny, yonik) -* SOLR-1628: log contains incorrect number of adds and deletes. +* SOLR-1628: log contains incorrect number of adds and deletes. (Thijs Vonk via yonik) * SOLR-343: Date faceting now respects facet.mincount limiting @@ -562,7 +670,7 @@ Bug Fixes (never officially released) introduced another hanging bug due to connections not being released. (Attila Babo, Erik Hetzner, Johannes Tuchscherer via yonik) - + * SOLR-1748, SOLR-1747, SOLR-1746, SOLR-1745, SOLR-1744: Streams and Readers retrieved from ContentStreams are not closed in various places, resulting in file descriptor leaks. @@ -571,7 +679,7 @@ Bug Fixes * SOLR-1753: StatsComponent throws NPE when getting statistics for facets in distributed search (Janne Majaranta via koji) -* SOLR-1736:In the slave , If 'mov'ing file does not succeed , copy the file (noble) +* SOLR-1736:In the slave , If 'mov'ing file does not succeed , copy the file (noble) * SOLR-1579: Fixes to XML escaping in stats.jsp (David Bowen and hossman) @@ -625,7 +733,7 @@ Bug Fixes * SOLR-2047: ReplicationHandler should accept bool type for enable flag. (koji) -* SOLR-1630: Fix spell checking collation issue related to token positions (rmuir, gsingers) +* SOLR-1630: Fix spell checking collation issue related to token positions (rmuir, gsingers) * SOLR-2100: The replication handler backup command didn't save the commit point and hence could fail when a newer commit caused the older commit point @@ -634,7 +742,7 @@ Bug Fixes * SOLR-2114: Fixed parsing error in hsin function. The function signature has changed slightly. (gsingers) -* SOLR-2083: SpellCheckComponent misreports suggestions when distributed (James Dyer via gsingers) +* SOLR-2083: SpellCheckComponent misreports suggestions when distributed (James Dyer via gsingers) * SOLR-2111: Change exception handling in distributed faceting to work more like non-distributed faceting, change facet_counts/exception from a String @@ -658,9 +766,9 @@ Bug Fixes * SOLR-2173: Suggester should always rebuild Lookup data if Lookup.load fails. (ab) * SOLR-2081: BaseResponseWriter.isStreamingDocs causes - SingleResponseWriter.end to be called 2x - (Chris A. Mattmann via hossman) - + SingleResponseWriter.end to be called 2x + (Chris A. Mattmann via hossman) + * SOLR-2219: The init() method of every SolrRequestHandler was being called twice. (ambikeshwar singh and hossman) @@ -685,7 +793,7 @@ Bug Fixes * SOLR-482: Provide more exception handling in CSVLoader (gsingers) -* SOLR-1283: HTMLStripCharFilter sometimes threw a "Mark Invalid" exception. +* SOLR-1283: HTMLStripCharFilter sometimes threw a "Mark Invalid" exception. (Julien Coloos, hossman, yonik) * SOLR-2085: Improve SolrJ behavior when FacetComponent comes before @@ -712,21 +820,29 @@ Bug Fixes * SOLR-2380: Distributed faceting could miss values when facet.sort=index and when facet.offset was greater than 0. (yonik) - + * SOLR-1656: XIncludes and other HREFs in XML files loaded by ResourceLoader - are fixed to be resolved using the URI standard (RFC 2396). The system - identifier is no longer a plain filename with path, it gets initialized - using a custom URI scheme "solrres:". This scheme is resolved using a - EntityResolver that utilizes ResourceLoader - (org.apache.solr.common.util.SystemIdResolver). This makes all relative - pathes in Solr's config files behave like expected. This change - introduces some backwards breaks in the API: Some config classes - (Config, SolrConfig, IndexSchema) were changed to take - org.xml.sax.InputSource instead of InputStream. There may also be some - backwards breaks in existing config files, it is recommended to check - your config files / XSLTs and replace all XIncludes/HREFs that were + are fixed to be resolved using the URI standard (RFC 2396). The system + identifier is no longer a plain filename with path, it gets initialized + using a custom URI scheme "solrres:". This scheme is resolved using a + EntityResolver that utilizes ResourceLoader + (org.apache.solr.common.util.SystemIdResolver). This makes all relative + pathes in Solr's config files behave like expected. This change + introduces some backwards breaks in the API: Some config classes + (Config, SolrConfig, IndexSchema) were changed to take + org.xml.sax.InputSource instead of InputStream. There may also be some + backwards breaks in existing config files, it is recommended to check + your config files / XSLTs and replace all XIncludes/HREFs that were hacked to use absolute paths to use relative ones. (uschindler) +* SOLR-309: Fix FieldType so setting an analyzer on a FieldType that + doesn't expect it will generate an error. Practically speaking this + means that Solr will now correctly generate an error on + initialization if the schema.xml contains an analyzer configuration + for a fieldType that does not use TextField. (hossman) + +* SOLR-2192: StreamingUpdateSolrServer.blockUntilFinished was not + thread safe and could throw an exception. (yonik) Other Changes ---------------------- diff --git a/solr/build.xml b/solr/build.xml index c19ab981fff..8c68ca3464e 100644 --- a/solr/build.xml +++ b/solr/build.xml @@ -120,7 +120,7 @@ + depends="compile-solrj"> @@ -176,6 +176,24 @@ + + + + + + + + + + + + + + + @@ -187,6 +205,7 @@ + @@ -220,8 +239,9 @@ failonerror="true"/> - + + @@ -318,13 +338,18 @@ - + + + + + + @@ -335,10 +360,19 @@ + + + + + + + + depends="compileTestFramework"> + depends="validate-solr, test-core, test-contrib, test-jsp" /> @@ -575,7 +609,7 @@ + depends="dist-solrj, dist-jar, dist-test-framework, dist-contrib, dist-war" /> + + + + + + + + + + + + @@ -934,14 +984,14 @@ keyfile="${keyfile}" verbose="true" > - + - @@ -1052,6 +1102,14 @@ + + + + + + + diff --git a/solr/common-build.xml b/solr/common-build.xml index 36ca6f0194d..861ff237062 100644 --- a/solr/common-build.xml +++ b/solr/common-build.xml @@ -330,6 +330,7 @@ + @@ -339,6 +340,7 @@ + + + + + + + + + + + + + + + + + + + @@ -414,6 +434,8 @@ + + @@ -421,10 +443,13 @@ Signing @{input.file} Sig File: @{output.file} - + + + + diff --git a/solr/contrib/analysis-extras/CHANGES.txt b/solr/contrib/analysis-extras/CHANGES.txt index 6f26f9f6683..72053f89d8b 100644 --- a/solr/contrib/analysis-extras/CHANGES.txt +++ b/solr/contrib/analysis-extras/CHANGES.txt @@ -13,7 +13,9 @@ analyzers for Chinese and Polish. $Id$ ================== Release 4.0-dev ================== -(No Changes) +* SOLR-2396: Add ICUCollationField, which is much more efficient than + the Solr 3.x ICUCollationKeyFilterFactory, and also supports + Locale-sensitive range queries. (rmuir) ================== Release 3.2-dev ================== diff --git a/solr/contrib/analysis-extras/build.xml b/solr/contrib/analysis-extras/build.xml index 1b135e3c4af..9cc5aa217bc 100644 --- a/solr/contrib/analysis-extras/build.xml +++ b/solr/contrib/analysis-extras/build.xml @@ -72,7 +72,7 @@ - + @@ -92,7 +92,7 @@ - + diff --git a/solr/contrib/analysis-extras/src/java/org/apache/solr/analysis/ICUCollationKeyFilterFactory.java b/solr/contrib/analysis-extras/src/java/org/apache/solr/analysis/ICUCollationKeyFilterFactory.java index 1a79de899f6..e88a8f0128e 100644 --- a/solr/contrib/analysis-extras/src/java/org/apache/solr/analysis/ICUCollationKeyFilterFactory.java +++ b/solr/contrib/analysis-extras/src/java/org/apache/solr/analysis/ICUCollationKeyFilterFactory.java @@ -57,7 +57,9 @@ import com.ibm.icu.util.ULocale; * @see Collator * @see ULocale * @see RuleBasedCollator + * @deprecated use {@link org.apache.solr.schema.ICUCollationField} instead. */ +@Deprecated public class ICUCollationKeyFilterFactory extends BaseTokenFilterFactory implements ResourceLoaderAware { private Collator collator; diff --git a/solr/contrib/analysis-extras/src/java/org/apache/solr/schema/ICUCollationField.java b/solr/contrib/analysis-extras/src/java/org/apache/solr/schema/ICUCollationField.java new file mode 100644 index 00000000000..dbbbfc08aaa --- /dev/null +++ b/solr/contrib/analysis-extras/src/java/org/apache/solr/schema/ICUCollationField.java @@ -0,0 +1,228 @@ +package org.apache.solr.schema; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.io.InputStream; +import java.io.StringReader; +import java.util.Map; + +import org.apache.commons.io.IOUtils; +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute; +import org.apache.lucene.collation.ICUCollationKeyAnalyzer; +import org.apache.lucene.document.Fieldable; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.SortField; +import org.apache.lucene.search.TermRangeQuery; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.Version; +import org.apache.solr.common.ResourceLoader; +import org.apache.solr.common.SolrException; +import org.apache.solr.common.SolrException.ErrorCode; +import org.apache.solr.response.TextResponseWriter; +import org.apache.solr.search.QParser; + +import com.ibm.icu.text.Collator; +import com.ibm.icu.text.RuleBasedCollator; +import com.ibm.icu.util.ULocale; + +/** + * Field for collated sort keys. + * These can be used for locale-sensitive sort and range queries. + *

    + * This field can be created in two ways: + *

      + *
    • Based upon a system collator associated with a Locale. + *
    • Based upon a tailored ruleset. + *
    + *

    + * Using a System collator: + *

      + *
    • locale: RFC 3066 locale ID (mandatory) + *
    • strength: 'primary','secondary','tertiary', 'quaternary', or 'identical' (optional) + *
    • decomposition: 'no', or 'canonical' (optional) + *
    + *

    + * Using a Tailored ruleset: + *

      + *
    • custom: UTF-8 text file containing rules supported by RuleBasedCollator (mandatory) + *
    • strength: 'primary','secondary','tertiary', 'quaternary', or 'identical' (optional) + *
    • decomposition: 'no' or 'canonical' (optional) + *
    + * + * @see Collator + * @see ULocale + * @see RuleBasedCollator + */ +public class ICUCollationField extends FieldType { + private Analyzer analyzer; + + @Override + protected void init(IndexSchema schema, Map args) { + properties |= TOKENIZED; // this ensures our analyzer gets hit + setup(schema.getResourceLoader(), args); + super.init(schema, args); + } + + /** + * Setup the field according to the provided parameters + */ + private void setup(ResourceLoader loader, Map args) { + String custom = args.remove("custom"); + String localeID = args.remove("locale"); + String strength = args.remove("strength"); + String decomposition = args.remove("decomposition"); + + if (custom == null && localeID == null) + throw new SolrException(ErrorCode.SERVER_ERROR, "Either custom or locale is required."); + + if (custom != null && localeID != null) + throw new SolrException(ErrorCode.SERVER_ERROR, "Cannot specify both locale and custom. " + + "To tailor rules for a built-in language, see the javadocs for RuleBasedCollator. " + + "Then save the entire customized ruleset to a file, and use with the custom parameter"); + + final Collator collator; + + if (localeID != null) { + // create from a system collator, based on Locale. + collator = createFromLocale(localeID); + } else { + // create from a custom ruleset + collator = createFromRules(custom, loader); + } + + // set the strength flag, otherwise it will be the default. + if (strength != null) { + if (strength.equalsIgnoreCase("primary")) + collator.setStrength(Collator.PRIMARY); + else if (strength.equalsIgnoreCase("secondary")) + collator.setStrength(Collator.SECONDARY); + else if (strength.equalsIgnoreCase("tertiary")) + collator.setStrength(Collator.TERTIARY); + else if (strength.equalsIgnoreCase("quaternary")) + collator.setStrength(Collator.QUATERNARY); + else if (strength.equalsIgnoreCase("identical")) + collator.setStrength(Collator.IDENTICAL); + else + throw new SolrException(ErrorCode.SERVER_ERROR, "Invalid strength: " + strength); + } + + // set the decomposition flag, otherwise it will be the default. + if (decomposition != null) { + if (decomposition.equalsIgnoreCase("no")) + collator.setDecomposition(Collator.NO_DECOMPOSITION); + else if (decomposition.equalsIgnoreCase("canonical")) + collator.setDecomposition(Collator.CANONICAL_DECOMPOSITION); + else + throw new SolrException(ErrorCode.SERVER_ERROR, "Invalid decomposition: " + decomposition); + } + // we use 4.0 because it ensures we just encode the pure byte[] keys. + analyzer = new ICUCollationKeyAnalyzer(Version.LUCENE_40, collator); + } + + /** + * Create a locale from localeID. + * Then return the appropriate collator for the locale. + */ + private Collator createFromLocale(String localeID) { + return Collator.getInstance(new ULocale(localeID)); + } + + /** + * Read custom rules from a file, and create a RuleBasedCollator + * The file cannot support comments, as # might be in the rules! + */ + private Collator createFromRules(String fileName, ResourceLoader loader) { + InputStream input = null; + try { + input = loader.openResource(fileName); + String rules = IOUtils.toString(input, "UTF-8"); + return new RuleBasedCollator(rules); + } catch (Exception e) { + // io error or invalid rules + throw new RuntimeException(e); + } finally { + IOUtils.closeQuietly(input); + } + } + + @Override + public void write(TextResponseWriter writer, String name, Fieldable f) throws IOException { + writer.writeStr(name, f.stringValue(), true); + } + + @Override + public SortField getSortField(SchemaField field, boolean top) { + return getStringSort(field, top); + } + + @Override + public Analyzer getAnalyzer() { + return analyzer; + } + + @Override + public Analyzer getQueryAnalyzer() { + return analyzer; + } + + /** + * analyze the range with the analyzer, instead of the collator. + * because icu collators are not thread safe, this keeps things + * simple (we already have a threadlocal clone in the reused TS) + */ + private BytesRef analyzeRangePart(String field, String part) { + TokenStream source; + + try { + source = analyzer.reusableTokenStream(field, new StringReader(part)); + source.reset(); + } catch (IOException e) { + source = analyzer.tokenStream(field, new StringReader(part)); + } + + TermToBytesRefAttribute termAtt = source.getAttribute(TermToBytesRefAttribute.class); + BytesRef bytes = termAtt.getBytesRef(); + + // we control the analyzer here: most errors are impossible + try { + if (!source.incrementToken()) + throw new IllegalArgumentException("analyzer returned no terms for range part: " + part); + termAtt.fillBytesRef(); + assert !source.incrementToken(); + } catch (IOException e) { + throw new RuntimeException("error analyzing range part: " + part, e); + } + + try { + source.close(); + } catch (IOException ignored) {} + + return new BytesRef(bytes); + } + + @Override + public Query getRangeQuery(QParser parser, SchemaField field, String part1, String part2, boolean minInclusive, boolean maxInclusive) { + String f = field.getName(); + BytesRef low = part1 == null ? null : analyzeRangePart(f, part1); + BytesRef high = part2 == null ? null : analyzeRangePart(f, part2); + return new TermRangeQuery(field.getName(), low, high, minInclusive, maxInclusive); + } +} diff --git a/solr/contrib/analysis-extras/src/test-files/empty b/solr/contrib/analysis-extras/src/test-files/empty deleted file mode 100644 index ad1d7bb4e6a..00000000000 --- a/solr/contrib/analysis-extras/src/test-files/empty +++ /dev/null @@ -1,4 +0,0 @@ -Please don't remove this silly file! - -This is here to make sure the dir is not empty... otherwise Ht/git -clones have problems. diff --git a/solr/contrib/analysis-extras/src/test-files/solr-analysis-extras/conf/schema-icucollate.xml b/solr/contrib/analysis-extras/src/test-files/solr-analysis-extras/conf/schema-icucollate.xml new file mode 100644 index 00000000000..3ec19c65175 --- /dev/null +++ b/solr/contrib/analysis-extras/src/test-files/solr-analysis-extras/conf/schema-icucollate.xml @@ -0,0 +1,59 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + text + id + + + + + + + + diff --git a/solr/contrib/analysis-extras/src/test-files/solr-analysis-extras/conf/solrconfig-icucollate.xml b/solr/contrib/analysis-extras/src/test-files/solr-analysis-extras/conf/solrconfig-icucollate.xml new file mode 100644 index 00000000000..2c9b55c1a6a --- /dev/null +++ b/solr/contrib/analysis-extras/src/test-files/solr-analysis-extras/conf/solrconfig-icucollate.xml @@ -0,0 +1,23 @@ + + + + + + ${tests.luceneMatchVersion:LUCENE_CURRENT} + + diff --git a/solr/contrib/analysis-extras/src/test/org/apache/solr/analysis/TestICUCollationKeyFilterFactory.java b/solr/contrib/analysis-extras/src/test/org/apache/solr/analysis/TestICUCollationKeyFilterFactory.java index 44c42f6f2f6..e99105276e3 100644 --- a/solr/contrib/analysis-extras/src/test/org/apache/solr/analysis/TestICUCollationKeyFilterFactory.java +++ b/solr/contrib/analysis-extras/src/test/org/apache/solr/analysis/TestICUCollationKeyFilterFactory.java @@ -34,6 +34,7 @@ import com.ibm.icu.text.Collator; import com.ibm.icu.text.RuleBasedCollator; import com.ibm.icu.util.ULocale; +@Deprecated public class TestICUCollationKeyFilterFactory extends BaseTokenTestCase { /* diff --git a/solr/contrib/analysis-extras/src/test/org/apache/solr/schema/TestICUCollationField.java b/solr/contrib/analysis-extras/src/test/org/apache/solr/schema/TestICUCollationField.java new file mode 100644 index 00000000000..ddf9d0f4f95 --- /dev/null +++ b/solr/contrib/analysis-extras/src/test/org/apache/solr/schema/TestICUCollationField.java @@ -0,0 +1,186 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.schema; + +import java.io.File; +import java.io.FileOutputStream; + +import org.apache.commons.io.FileUtils; +import org.apache.commons.io.IOUtils; +import org.apache.lucene.index.codecs.CodecProvider; +import org.apache.solr.SolrTestCaseJ4; +import org.junit.BeforeClass; + +import com.ibm.icu.text.Collator; +import com.ibm.icu.text.RuleBasedCollator; +import com.ibm.icu.util.ULocale; + +/** + * Tests {@link ICUCollationField} with TermQueries, RangeQueries, and sort order. + */ +public class TestICUCollationField extends SolrTestCaseJ4 { + + @BeforeClass + public static void beforeClass() throws Exception { + assumeFalse("preflex format only supports UTF-8 encoded bytes", "PreFlex".equals(CodecProvider.getDefault().getDefaultFieldCodec())); + String home = setupSolrHome(); + initCore("solrconfig.xml","schema.xml", home); + // add some docs + assertU(adoc("id", "1", "text", "\u0633\u0627\u0628")); + assertU(adoc("id", "2", "text", "I WİLL USE TURKİSH CASING")); + assertU(adoc("id", "3", "text", "ı will use turkish casıng")); + assertU(adoc("id", "4", "text", "Töne")); + assertU(adoc("id", "5", "text", "I W\u0049\u0307LL USE TURKİSH CASING")); + assertU(adoc("id", "6", "text", "Testing")); + assertU(adoc("id", "7", "text", "Tone")); + assertU(adoc("id", "8", "text", "Testing")); + assertU(adoc("id", "9", "text", "testing")); + assertU(adoc("id", "10", "text", "toene")); + assertU(adoc("id", "11", "text", "Tzne")); + assertU(adoc("id", "12", "text", "\u0698\u0698")); + assertU(commit()); + } + + /** + * Ugly: but what to do? We want to test custom sort, which reads rules in as a resource. + * These are largish files, and jvm-specific (as our documentation says, you should always + * look out for jvm differences with collation). + * So its preferable to create this file on-the-fly. + */ + public static String setupSolrHome() throws Exception { + // make a solr home underneath the test's TEMP_DIR + File tmpFile = File.createTempFile("test", "tmp", TEMP_DIR); + tmpFile.delete(); + tmpFile.mkdir(); + + // make data and conf dirs + new File(tmpFile, "data").mkdir(); + File confDir = new File(tmpFile, "conf"); + confDir.mkdir(); + + // copy over configuration files + FileUtils.copyFile(getFile("solr-analysis-extras/conf/solrconfig-icucollate.xml"), new File(confDir, "solrconfig.xml")); + FileUtils.copyFile(getFile("solr-analysis-extras/conf/schema-icucollate.xml"), new File(confDir, "schema.xml")); + + // generate custom collation rules (DIN 5007-2), saving to customrules.dat + RuleBasedCollator baseCollator = (RuleBasedCollator) Collator.getInstance(new ULocale("de", "DE")); + + String DIN5007_2_tailorings = + "& ae , a\u0308 & AE , A\u0308"+ + "& oe , o\u0308 & OE , O\u0308"+ + "& ue , u\u0308 & UE , u\u0308"; + + RuleBasedCollator tailoredCollator = new RuleBasedCollator(baseCollator.getRules() + DIN5007_2_tailorings); + String tailoredRules = tailoredCollator.getRules(); + FileOutputStream os = new FileOutputStream(new File(confDir, "customrules.dat")); + IOUtils.write(tailoredRules, os, "UTF-8"); + os.close(); + + return tmpFile.getAbsolutePath(); + } + + /** + * Test termquery with german DIN 5007-1 primary strength. + * In this case, ö is equivalent to o (but not oe) + */ + public void testBasicTermQuery() { + assertQ("Collated TQ: ", + req("fl", "id", "q", "sort_de:tone", "sort", "id asc" ), + "//*[@numFound='2']", + "//result/doc[1]/int[@name='id'][.=4]", + "//result/doc[2]/int[@name='id'][.=7]" + ); + } + + /** + * Test rangequery again with the DIN 5007-1 collator. + * We do a range query of tone .. tp, in binary order this + * would retrieve nothing due to case and accent differences. + */ + public void testBasicRangeQuery() { + assertQ("Collated RangeQ: ", + req("fl", "id", "q", "sort_de:[tone TO tp]", "sort", "id asc" ), + "//*[@numFound='2']", + "//result/doc[1]/int[@name='id'][.=4]", + "//result/doc[2]/int[@name='id'][.=7]" + ); + } + + /** + * Test sort with a danish collator. ö is ordered after z + */ + public void testBasicSort() { + assertQ("Collated Sort: ", + req("fl", "id", "q", "sort_da:[tz TO töz]", "sort", "sort_da asc" ), + "//*[@numFound='2']", + "//result/doc[1]/int[@name='id'][.=11]", + "//result/doc[2]/int[@name='id'][.=4]" + ); + } + + /** + * Test sort with an arabic collator. U+0633 is ordered after U+0698. + * With a binary collator, the range would also return nothing. + */ + public void testArabicSort() { + assertQ("Collated Sort: ", + req("fl", "id", "q", "sort_ar:[\u0698 TO \u0633\u0633]", "sort", "sort_ar asc" ), + "//*[@numFound='2']", + "//result/doc[1]/int[@name='id'][.=12]", + "//result/doc[2]/int[@name='id'][.=1]" + ); + } + + /** + * Test rangequery again with an Arabic collator. + * Binary order would normally order U+0633 in this range. + */ + public void testNegativeRangeQuery() { + assertQ("Collated RangeQ: ", + req("fl", "id", "q", "sort_ar:[\u062F TO \u0698]", "sort", "id asc" ), + "//*[@numFound='0']" + ); + } + /** + * Test canonical decomposition with turkish primary strength. + * With this sort order, İ is the uppercase form of i, and I is the uppercase form of ı. + * We index a decomposed form of İ. + */ + public void testCanonicalDecomposition() { + assertQ("Collated TQ: ", + req("fl", "id", "q", "sort_tr_canon:\"I Will Use Turkish Casıng\"", "sort", "id asc" ), + "//*[@numFound='3']", + "//result/doc[1]/int[@name='id'][.=2]", + "//result/doc[2]/int[@name='id'][.=3]", + "//result/doc[3]/int[@name='id'][.=5]" + ); + } + + /** + * Test termquery with custom collator (DIN 5007-2). + * In this case, ö is equivalent to oe (but not o) + */ + public void testCustomCollation() { + assertQ("Collated TQ: ", + req("fl", "id", "q", "sort_custom:toene", "sort", "id asc" ), + "//*[@numFound='2']", + "//result/doc[1]/int[@name='id'][.=4]", + "//result/doc[2]/int[@name='id'][.=10]" + ); + } +} diff --git a/solr/contrib/clustering/build.xml b/solr/contrib/clustering/build.xml index 7090ca710bc..aee297e3b8f 100644 --- a/solr/contrib/clustering/build.xml +++ b/solr/contrib/clustering/build.xml @@ -41,8 +41,8 @@ - - + + @@ -64,7 +64,7 @@ - + diff --git a/solr/contrib/dataimporthandler/build.xml b/solr/contrib/dataimporthandler/build.xml index 5b9ddc18dec..bd6ea50a2e2 100644 --- a/solr/contrib/dataimporthandler/build.xml +++ b/solr/contrib/dataimporthandler/build.xml @@ -55,7 +55,7 @@ - + @@ -67,7 +67,7 @@ - + @@ -89,7 +89,7 @@ - + diff --git a/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/DataImportHandler.java b/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/DataImportHandler.java index e4ba7fbd91c..83e546d0c32 100644 --- a/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/DataImportHandler.java +++ b/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/DataImportHandler.java @@ -194,7 +194,7 @@ public class DataImportHandler extends RequestHandlerBase implements IMPORT_CMD.equals(command)) { UpdateRequestProcessorChain processorChain = - req.getCore().getUpdateProcessingChain(params.get(UpdateParams.UPDATE_PROCESSOR)); + req.getCore().getUpdateProcessingChain(params.get(UpdateParams.UPDATE_CHAIN)); UpdateRequestProcessor processor = processorChain.createProcessor(req, rsp); SolrResourceLoader loader = req.getCore().getResourceLoader(); SolrWriter sw = getSolrWriter(processor, loader, requestParams, req); diff --git a/solr/contrib/extraction/build.xml b/solr/contrib/extraction/build.xml index de7542d54b4..01aa60e7485 100644 --- a/solr/contrib/extraction/build.xml +++ b/solr/contrib/extraction/build.xml @@ -39,7 +39,7 @@ - + @@ -51,7 +51,7 @@ - + diff --git a/solr/contrib/uima/CHANGES.txt b/solr/contrib/uima/CHANGES.txt index 8b588c97cd7..a31054a05b5 100644 --- a/solr/contrib/uima/CHANGES.txt +++ b/solr/contrib/uima/CHANGES.txt @@ -21,11 +21,25 @@ $Id$ ================== 3.2.0-dev ================== +Upgrading from Solr 3.1 +---------------------- + +* just beneath ... is no longer supported. + It should move to UIMAUpdateRequestProcessorFactory setting. + See contrib/uima/README.txt for more details. (SOLR-2436) + Test Cases: +---------------------- - * SOLR-2387: add mock annotators for improved testing, - (Tommaso Teofili via rmuir) +* SOLR-2387: add mock annotators for improved testing, + (Tommaso Teofili via rmuir) -================== 3.1.0-dev ================== +Other Changes +---------------------- + +* SOLR-2436: move uimaConfig to under the uima's update processor in solrconfig.xml. + (Tommaso Teofili, koji) + +================== 3.1.0 ================== Initial Release diff --git a/solr/contrib/uima/README.txt b/solr/contrib/uima/README.txt index e9a03eec648..a8ef9cd5598 100644 --- a/solr/contrib/uima/README.txt +++ b/solr/contrib/uima/README.txt @@ -3,38 +3,61 @@ Getting Started To start using Solr UIMA Metadata Extraction Library you should go through the following configuration steps: 1. copy generated solr-uima jar and its libs (under contrib/uima/lib) inside a Solr libraries directory. + or set tags in solrconfig.xml appropriately to point those jar files. + + + 2. modify your schema.xml adding the fields you want to be hold metadata specifying proper values for type, indexed, stored and multiValued options: -3. for example you could specify the following + for example you could specify the following + -4. modify your solrconfig.xml adding the following snippet: - - - VALID_ALCHEMYAPI_KEY - VALID_ALCHEMYAPI_KEY - VALID_ALCHEMYAPI_KEY - VALID_ALCHEMYAPI_KEY - VALID_ALCHEMYAPI_KEY - VALID_OPENCALAIS_KEY - - /org/apache/uima/desc/OverridingParamsExtServicesAE.xml - text - - - - - - - - - - - - +3. modify your solrconfig.xml adding the following snippet: + + + + + + VALID_ALCHEMYAPI_KEY + VALID_ALCHEMYAPI_KEY + VALID_ALCHEMYAPI_KEY + VALID_ALCHEMYAPI_KEY + VALID_ALCHEMYAPI_KEY + VALID_OPENCALAIS_KEY + + /org/apache/uima/desc/OverridingParamsExtServicesAE.xml + + false + + text + + + + + org.apache.uima.alchemy.ts.concept.ConceptFS + text + concept + + + org.apache.uima.alchemy.ts.language.LanguageFS + language + language + + + org.apache.uima.SentenceAnnotation + coveredText + sentence + + + + + + + where VALID_ALCHEMYAPI_KEY is your AlchemyAPI Access Key. You need to register AlchemyAPI Access key to exploit the AlchemyAPI services: http://www.alchemyapi.com/api/register.html @@ -42,21 +65,14 @@ To start using Solr UIMA Metadata Extraction Library you should go through the f where VALID_OPENCALAIS_KEY is your Calais Service Key. You need to register Calais Service key to exploit the Calais services: http://www.opencalais.com/apikey -5. the analysisEngine tag must contain an AE descriptor inside the specified path in the classpath + the analysisEngine must contain an AE descriptor inside the specified path in the classpath -6. the analyzeFields tag must contain the input fields that need to be analyzed by UIMA, + the analyzeFields must contain the input fields that need to be analyzed by UIMA, if merge=true then their content will be merged and analyzed only once -7. field mapping describes which features of which types should go in a field + field mapping describes which features of which types should go in a field -8. define in your solrconfig.xml an UpdateRequestProcessorChain as following: - - - - - - -9. in your solrconfig.xml replace the existing default ( uima diff --git a/solr/contrib/uima/build.xml b/solr/contrib/uima/build.xml index 34dbefec748..631f8a8c413 100644 --- a/solr/contrib/uima/build.xml +++ b/solr/contrib/uima/build.xml @@ -40,7 +40,7 @@ - + @@ -52,7 +52,7 @@ - + diff --git a/solr/contrib/uima/src/main/java/org/apache/solr/uima/processor/SolrUIMAConfiguration.java b/solr/contrib/uima/src/main/java/org/apache/solr/uima/processor/SolrUIMAConfiguration.java index 62c145c5dc0..22357262ba3 100644 --- a/solr/contrib/uima/src/main/java/org/apache/solr/uima/processor/SolrUIMAConfiguration.java +++ b/solr/contrib/uima/src/main/java/org/apache/solr/uima/processor/SolrUIMAConfiguration.java @@ -21,7 +21,7 @@ import java.util.Map; /** * Configuration holding all the configurable parameters for calling UIMA inside Solr - * + * * @version $Id$ */ public class SolrUIMAConfiguration { @@ -65,5 +65,4 @@ public class SolrUIMAConfiguration { public Map getRuntimeParameters() { return runtimeParameters; } - } diff --git a/solr/contrib/uima/src/main/java/org/apache/solr/uima/processor/SolrUIMAConfigurationReader.java b/solr/contrib/uima/src/main/java/org/apache/solr/uima/processor/SolrUIMAConfigurationReader.java index 3f2b01d7328..00e6aca3288 100644 --- a/solr/contrib/uima/src/main/java/org/apache/solr/uima/processor/SolrUIMAConfigurationReader.java +++ b/solr/contrib/uima/src/main/java/org/apache/solr/uima/processor/SolrUIMAConfigurationReader.java @@ -18,11 +18,10 @@ package org.apache.solr.uima.processor; */ import java.util.HashMap; +import java.util.List; import java.util.Map; -import org.apache.solr.core.SolrConfig; -import org.w3c.dom.Node; -import org.w3c.dom.NodeList; +import org.apache.solr.common.util.NamedList; /** * Read configuration for Solr-UIMA integration @@ -32,18 +31,10 @@ import org.w3c.dom.NodeList; */ public class SolrUIMAConfigurationReader { - private static final String AE_RUNTIME_PARAMETERS_NODE_PATH = "/config/uimaConfig/runtimeParameters"; + private NamedList args; - private static final String FIELD_MAPPING_NODE_PATH = "/config/uimaConfig/fieldMapping"; - - private static final String ANALYZE_FIELDS_NODE_PATH = "/config/uimaConfig/analyzeFields"; - - private static final String ANALYSIS_ENGINE_NODE_PATH = "/config/uimaConfig/analysisEngine"; - - private SolrConfig solrConfig; - - public SolrUIMAConfigurationReader(SolrConfig solrConfig) { - this.solrConfig = solrConfig; + public SolrUIMAConfigurationReader(NamedList args) { + this.args = args; } public SolrUIMAConfiguration readSolrUIMAConfiguration() { @@ -52,73 +43,51 @@ public class SolrUIMAConfigurationReader { } private String readAEPath() { - return solrConfig.getNode(ANALYSIS_ENGINE_NODE_PATH, true).getTextContent(); + return (String) args.get("analysisEngine"); } + @SuppressWarnings("rawtypes") + private NamedList getAnalyzeFields() { + return (NamedList) args.get("analyzeFields"); + } + + @SuppressWarnings("unchecked") private String[] readFieldsToAnalyze() { - Node analyzeFieldsNode = solrConfig.getNode(ANALYZE_FIELDS_NODE_PATH, true); - return analyzeFieldsNode.getTextContent().split(","); + List fields = (List) getAnalyzeFields().get("fields"); + return fields.toArray(new String[fields.size()]); } private boolean readFieldsMerging() { - Node analyzeFieldsNode = solrConfig.getNode(ANALYZE_FIELDS_NODE_PATH, true); - Node mergeNode = analyzeFieldsNode.getAttributes().getNamedItem("merge"); - return Boolean.valueOf(mergeNode.getNodeValue()); + return (Boolean) getAnalyzeFields().get("merge"); } + @SuppressWarnings("rawtypes") private Map> readTypesFeaturesFieldsMapping() { Map> map = new HashMap>(); - Node fieldMappingNode = solrConfig.getNode(FIELD_MAPPING_NODE_PATH, true); + NamedList fieldMappings = (NamedList) args.get("fieldMappings"); /* iterate over UIMA types */ - if (fieldMappingNode.hasChildNodes()) { - NodeList typeNodes = fieldMappingNode.getChildNodes(); - for (int i = 0; i < typeNodes.getLength(); i++) { - /* node */ - Node typeNode = typeNodes.item(i); - if (typeNode.getNodeType() != Node.TEXT_NODE) { - Node typeNameAttribute = typeNode.getAttributes().getNamedItem("name"); - /* get a UIMA typename */ - String typeName = typeNameAttribute.getNodeValue(); - /* create entry for UIMA type */ - map.put(typeName, new HashMap()); - if (typeNode.hasChildNodes()) { - /* iterate over features */ - NodeList featuresNodeList = typeNode.getChildNodes(); - for (int j = 0; j < featuresNodeList.getLength(); j++) { - Node mappingNode = featuresNodeList.item(j); - if (mappingNode.getNodeType() != Node.TEXT_NODE) { - /* get field name */ - Node fieldNameNode = mappingNode.getAttributes().getNamedItem("field"); - String mappedFieldName = fieldNameNode.getNodeValue(); - /* get feature name */ - Node featureNameNode = mappingNode.getAttributes().getNamedItem("feature"); - String featureName = featureNameNode.getNodeValue(); - /* map the feature to the field for the specified type */ - map.get(typeName).put(featureName, mappedFieldName); - } - } - } - } - } + for (int i = 0; i < fieldMappings.size(); i++) { + NamedList mapping = (NamedList) fieldMappings.get("mapping", i); + String typeName = (String) mapping.get("type"); + String featureName = (String) mapping.get("feature"); + String mappedFieldName = (String) mapping.get("field"); + Map subMap = new HashMap(); + subMap.put(featureName, mappedFieldName); + map.put(typeName, subMap); } return map; } + @SuppressWarnings("rawtypes") private Map readAEOverridingParameters() { Map runtimeParameters = new HashMap(); - Node uimaConfigNode = solrConfig.getNode(AE_RUNTIME_PARAMETERS_NODE_PATH, true); - - if (uimaConfigNode.hasChildNodes()) { - NodeList overridingNodes = uimaConfigNode.getChildNodes(); - for (int i = 0; i < overridingNodes.getLength(); i++) { - Node overridingNode = overridingNodes.item(i); - if (overridingNode.getNodeType() != Node.TEXT_NODE && overridingNode.getNodeType() != Node.COMMENT_NODE) { - runtimeParameters.put(overridingNode.getNodeName(), overridingNode.getTextContent()); - } - } + NamedList runtimeParams = (NamedList) args.get("runtimeParameters"); + for (int i = 0; i < runtimeParams.size(); i++) { + String name = runtimeParams.getName(i); + Object value = runtimeParams.getVal(i); + runtimeParameters.put(name, value); } - return runtimeParameters; } diff --git a/solr/contrib/uima/src/main/java/org/apache/solr/uima/processor/UIMAUpdateRequestProcessor.java b/solr/contrib/uima/src/main/java/org/apache/solr/uima/processor/UIMAUpdateRequestProcessor.java index 4f7e004666c..8b3cb547d67 100644 --- a/solr/contrib/uima/src/main/java/org/apache/solr/uima/processor/UIMAUpdateRequestProcessor.java +++ b/solr/contrib/uima/src/main/java/org/apache/solr/uima/processor/UIMAUpdateRequestProcessor.java @@ -34,7 +34,7 @@ import org.apache.uima.resource.ResourceInitializationException; /** * Update document(s) to be indexed with UIMA extracted information - * + * * @version $Id$ */ public class UIMAUpdateRequestProcessor extends UpdateRequestProcessor { @@ -43,15 +43,14 @@ public class UIMAUpdateRequestProcessor extends UpdateRequestProcessor { private AEProvider aeProvider; - public UIMAUpdateRequestProcessor(UpdateRequestProcessor next, SolrCore solrCore) { + public UIMAUpdateRequestProcessor(UpdateRequestProcessor next, SolrCore solrCore, + SolrUIMAConfiguration config) { super(next); - initialize(solrCore); + initialize(solrCore, config); } - private void initialize(SolrCore solrCore) { - SolrUIMAConfigurationReader uimaConfigurationReader = new SolrUIMAConfigurationReader(solrCore - .getSolrConfig()); - solrUIMAConfiguration = uimaConfigurationReader.readSolrUIMAConfiguration(); + private void initialize(SolrCore solrCore, SolrUIMAConfiguration config) { + solrUIMAConfiguration = config; aeProvider = AEProviderFactory.getInstance().getAEProvider(solrCore.getName(), solrUIMAConfiguration.getAePath(), solrUIMAConfiguration.getRuntimeParameters()); } diff --git a/solr/contrib/uima/src/main/java/org/apache/solr/uima/processor/UIMAUpdateRequestProcessorFactory.java b/solr/contrib/uima/src/main/java/org/apache/solr/uima/processor/UIMAUpdateRequestProcessorFactory.java index b8167572195..9cb46018ffa 100644 --- a/solr/contrib/uima/src/main/java/org/apache/solr/uima/processor/UIMAUpdateRequestProcessorFactory.java +++ b/solr/contrib/uima/src/main/java/org/apache/solr/uima/processor/UIMAUpdateRequestProcessorFactory.java @@ -17,6 +17,7 @@ package org.apache.solr.uima.processor; * limitations under the License. */ +import org.apache.solr.common.util.NamedList; import org.apache.solr.request.SolrQueryRequest; import org.apache.solr.response.SolrQueryResponse; import org.apache.solr.update.processor.UpdateRequestProcessor; @@ -29,10 +30,19 @@ import org.apache.solr.update.processor.UpdateRequestProcessorFactory; */ public class UIMAUpdateRequestProcessorFactory extends UpdateRequestProcessorFactory { + private NamedList args; + + @SuppressWarnings("unchecked") + @Override + public void init(@SuppressWarnings("rawtypes") NamedList args) { + this.args = (NamedList) args.get("uimaConfig"); + } + @Override public UpdateRequestProcessor getInstance(SolrQueryRequest req, SolrQueryResponse rsp, UpdateRequestProcessor next) { - return new UIMAUpdateRequestProcessor(next, req.getCore()); + return new UIMAUpdateRequestProcessor(next, req.getCore(), + new SolrUIMAConfigurationReader(args).readSolrUIMAConfiguration()); } } diff --git a/solr/contrib/uima/src/main/resources/solr/conf/aggregate-uima-config.xml b/solr/contrib/uima/src/main/resources/solr/conf/aggregate-uima-config.xml index 0e66585bf80..842b8e5cdd0 100644 --- a/solr/contrib/uima/src/main/resources/solr/conf/aggregate-uima-config.xml +++ b/solr/contrib/uima/src/main/resources/solr/conf/aggregate-uima-config.xml @@ -15,19 +15,34 @@ limitations under the License. --> - - - VALID_ALCHEMYAPI_KEY - VALID_ALCHEMYAPI_KEY - VALID_ALCHEMYAPI_KEY - VALID_ALCHEMYAPI_KEY - VALID_OPENCALAIS_KEY - - /org/apache/uima/desc/OverridingParamsExtServicesAE.xml - text,title - - - - - - \ No newline at end of file + + + + + VALID_ALCHEMYAPI_KEY + VALID_ALCHEMYAPI_KEY + VALID_ALCHEMYAPI_KEY + VALID_ALCHEMYAPI_KEY + VALID_ALCHEMYAPI_KEY + VALID_OPENCALAIS_KEY + + /org/apache/uima/desc/OverridingParamsExtServicesAE.xml + + false + + text + title + + + + + org.apache.uima.jcas.tcas.Annotation + convertText + tag + + + + + + + diff --git a/solr/contrib/uima/src/test/java/org/apache/solr/uima/processor/UIMAUpdateRequestProcessorTest.java b/solr/contrib/uima/src/test/java/org/apache/solr/uima/processor/UIMAUpdateRequestProcessorTest.java index dcf2f0d7b01..392afcf1ffc 100644 --- a/solr/contrib/uima/src/test/java/org/apache/solr/uima/processor/UIMAUpdateRequestProcessorTest.java +++ b/solr/contrib/uima/src/test/java/org/apache/solr/uima/processor/UIMAUpdateRequestProcessorTest.java @@ -108,7 +108,7 @@ public class UIMAUpdateRequestProcessorTest extends SolrTestCaseJ4 { private void addDoc(String doc) throws Exception { Map params = new HashMap(); - params.put(UpdateParams.UPDATE_PROCESSOR, new String[] { "uima" }); + params.put(UpdateParams.UPDATE_CHAIN, new String[] { "uima" }); MultiMapSolrParams mmparams = new MultiMapSolrParams(params); SolrQueryRequestBase req = new SolrQueryRequestBase(h.getCore(), (SolrParams) mmparams) { }; diff --git a/solr/contrib/uima/src/test/resources/solr-uima/conf/solrconfig.xml b/solr/contrib/uima/src/test/resources/solr-uima/conf/solrconfig.xml index 817b910d80d..5ca6b8a335e 100644 --- a/solr/contrib/uima/src/test/resources/solr-uima/conf/solrconfig.xml +++ b/solr/contrib/uima/src/test/resources/solr-uima/conf/solrconfig.xml @@ -295,7 +295,7 @@ @@ -855,7 +855,7 @@ --> - uima + uima @@ -953,42 +953,6 @@ - - - - - - 100 - - - - - - - - 70 - - 0.5 - - [-\w ,/\n\"']{20,200} - - - - - - - ]]> - ]]> - - - - - + + + + 3 + + /TestAE.xml + + false + + text + + + + + org.apache.uima.SentenceAnnotation + coveredText + sentence + + + org.apache.solr.uima.ts.SentimentAnnotation + mood + sentiment + + + org.apache.solr.uima.ts.EntityAnnotation + coveredText + entity + + + + - - - - - 3 - - /TestAE.xml - text - - - - - - - - - - - - - diff --git a/solr/example/solr/conf/solrconfig.xml b/solr/example/solr/conf/solrconfig.xml index fbd206fcc27..0d72cb2d183 100755 --- a/solr/example/solr/conf/solrconfig.xml +++ b/solr/example/solr/conf/solrconfig.xml @@ -774,6 +774,7 @@ *:* 10 *,score + text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4 @@ -792,6 +793,8 @@ 0 600 50 + popularity + 3 after manufacturedate_dt NOW/YEAR-10YEARS @@ -841,7 +844,7 @@ --> diff --git a/solr/example/solr/conf/velocity/VM_global_library.vm b/solr/example/solr/conf/velocity/VM_global_library.vm index 9ec7e09a151..3d33b512055 100644 --- a/solr/example/solr/conf/velocity/VM_global_library.vm +++ b/solr/example/solr/conf/velocity/VM_global_library.vm @@ -88,13 +88,13 @@
      #foreach ($facet in $field) #set($theDate = $date.toDate("yyyy-MM-dd'T'HH:mm:ss'Z'", $facet.key)) - #set($value = '["' + $facet.key + '" TO "' + $facet.key + $gap + '"]') + #set($value = '["' + $facet.key + '" TO "' + $facet.key + $gap + '"}') #set($facetURL = "#url_for_facet_date_filter($fieldName, $value)") #if ($facetURL != '') #if ($facet.key != "gap" && $facet.key != "start" && $facet.key != "end" && $facet.key != "before" && $facet.key != "after") -
    • $date.format('MMM yyyy', $theDate) ($facet.value)
    • +
    • $date.format('MMM yyyy', $theDate) $gap ($facet.value)
    • #end #if ($facet.key == "before" && $facet.value > 0)
    • Before ($facet.value)
    • @@ -113,20 +113,20 @@ $display
        #if($before && $before != "") - #set($value = "[* TO " + $start + "]") + #set($value = "[* TO " + $start + "}") #set($facetURL = "#url_for_facet_range_filter($fieldName, $value)")
      • Less than $start ($before)
      • #end #foreach ($facet in $field) #set($rangeEnd = $math.add($facet.key, $gap)) - #set($value = "[" + $facet.key + " TO " + $rangeEnd + "]") + #set($value = "[" + $facet.key + " TO " + $rangeEnd + "}") #set($facetURL = "#url_for_facet_range_filter($fieldName, $value)") #if ($facetURL != '') -
      • $facet.key ($facet.value)
      • +
      • $facet.key - $rangeEnd ($facet.value)
      • #end #end - #if($end && $end != "") - #set($value = "[" + $end + " TO *]") + #if($end && $end != "" && $after > 0) + #set($value = "[" + $end + " TO *}") #set($facetURL = "#url_for_facet_range_filter($fieldName, $value)")
      • More than $math.toNumber($end) ($after)
      • #end @@ -180,4 +180,8 @@ $v #end #end -#end \ No newline at end of file +#end + +#macro(capitalize $name) + ${name.substring(0,1).toUpperCase()}${name.substring(1)} +#end \ No newline at end of file diff --git a/solr/example/solr/conf/velocity/facet_dates.vm b/solr/example/solr/conf/velocity/facet_dates.vm index 5fd2151256c..5835212a1bb 100644 --- a/solr/example/solr/conf/velocity/facet_dates.vm +++ b/solr/example/solr/conf/velocity/facet_dates.vm @@ -1,5 +1,9 @@ ##TODO: Generically deal with all dates

        Date Facets

        -#set($field = $response.response.facet_counts.facet_dates.manufacturedate_dt) -#set($gap = $response.response.facet_counts.facet_dates.manufacturedate_dt.gap) -#display_facet_date($field, "Manufacture Date", "manufacturedate_dt", $gap) +#foreach ($field in $response.response.facet_counts.facet_dates) + #set($name = $field.key) + #set($display = "#capitalize($name)") + #set($f = $field.value) + #set($gap = $field.value.gap) + #display_facet_date($f, $display, $name, $gap) +#end \ No newline at end of file diff --git a/solr/example/solr/conf/velocity/facet_ranges.vm b/solr/example/solr/conf/velocity/facet_ranges.vm index a5e9b56e573..66804019590 100644 --- a/solr/example/solr/conf/velocity/facet_ranges.vm +++ b/solr/example/solr/conf/velocity/facet_ranges.vm @@ -1,10 +1,12 @@

        Range Facets

        -#set($field = $response.response.facet_counts.facet_ranges.price.counts) -#set($start = $response.response.facet_counts.facet_ranges.price.start) -#set($end = $response.response.facet_counts.facet_ranges.price.end) -#set($gap = $response.response.facet_counts.facet_ranges.price.gap) -#set($before = $response.response.facet_counts.facet_ranges.price.before) -#set($after = $response.response.facet_counts.facet_ranges.price.after) -##TODO: Make this display the "range", not just the lower value -##TODO: Have a generic way to deal with ranges -#display_facet_range($field, "Price (in $)", "price", $start, $end, $gap, $before, $after) +#foreach ($field in $response.response.facet_counts.facet_ranges) + #set($name = $field.key) + #set($display = "#capitalize($name)") + #set($f = $field.value.counts) + #set($start = $field.value.start) + #set($end = $field.value.end) + #set($gap = $field.value.gap) + #set($before = $field.value.before) + #set($after = $field.value.after) + #display_facet_range($f, $display, $name, $start, $end, $gap, $before, $after) +#end \ No newline at end of file diff --git a/solr/example/solr/conf/velocity/main.css b/solr/example/solr/conf/velocity/main.css index 076745d73bc..ed5687e392c 100644 --- a/solr/example/solr/conf/velocity/main.css +++ b/solr/example/solr/conf/velocity/main.css @@ -18,6 +18,18 @@ margin-left: 20px; } +.parsed_query_header { + font-family: Helvetica, Arial, sans-serif; + font-size: 10pt; + font-weight: bold; +} + +.parsed_query { + font-family: Courier, Courier New, monospaced; + font-size: 10pt; + font-weight: normal; +} + body { font-family: Helvetica, Arial, sans-serif; font-size: 10pt; diff --git a/solr/example/solr/conf/velocity/query.vm b/solr/example/solr/conf/velocity/query.vm index d9cc32c8311..ddbab3fcf73 100644 --- a/solr/example/solr/conf/velocity/query.vm +++ b/solr/example/solr/conf/velocity/query.vm @@ -27,14 +27,16 @@ #end #end +
        #if($request.params.get('debugQuery')) - toggle parsed query -
        $response.response.debug.parsedquery
        + toggle parsed query + #end #set($queryOpts = $request.params.get("queryOpts")) #if($queryOpts && $queryOpts != "") #end +
        diff --git a/solr/site/features.html b/solr/site/features.html index 6874449de23..f64d00e1d4a 100755 --- a/solr/site/features.html +++ b/solr/site/features.html @@ -130,6 +130,9 @@ document.write("Last Published: " + document.lastModified); +

      See the release notes for more details.

      - +

      20 August 2009 - Solr's first book is published!

      @@ -475,7 +529,7 @@ customization is required.

      Finally, this book covers various deployment considerations to include indexing strategies and performance-oriented configuration that will enable you to scale Solr to meet the needs of a high-volume site.

      - +

      18 August 2009 - Lucene at US ApacheCon

      @@ -551,7 +605,7 @@ Be sure not to miss: Search - Jason Rutherglen @ 15:00

    - +

    09 February 2009 - Lucene at ApacheCon Europe 2009 in Amsterdam

    @@ -589,23 +643,23 @@ Be sure not to miss: - +

    19 December 2008 - Solr Logo Contest Results

    Many great logos were submitted, but only one could be chosen. Congratulations Michiel, the creator of the winning logo that is proudly displayed at the top of this page.

    - +

    03 October 2008 - Solr Logo Contest

    By popular demand, Solr is holding a contest to pick a new Solr logo. Details about how to submit an entry can be found on the wiki. The Deadline for submissions is November 20th, 2008 @ 11:59PM GMT.

    - +

    15 September 2008 - Solr 1.3.0 Available

    Solr 1.3.0 is available for public download. This version contains many enhancements and bug fixes, including distributed search capabilities, Lucene 2.3.x performance improvements and many others.

    See the release notes for more details. Download is available from a Apache Mirror.

    - +

    28 August 2008 - Lucene/Solr at ApacheCon New Orleans

    @@ -627,7 +681,7 @@ Be sure not to miss:

  • An entire day of Lucene sessions on November 5th
  • - +

    03 September 2007 - Lucene at ApacheCon Atlanta

    ApacheCon US logo @@ -647,7 +701,7 @@ Be sure not to miss:

  • November 16, 4:00 pm: Advanced Indexing Techniques with Apache Lucene by Michael Busch. Information on payloads and advanced indexing techniques.
  • - +

    06 June 2007: Release 1.2 available

    This is the first release since Solr graduated from the Incubator, @@ -657,40 +711,40 @@ Be sure not to miss: and more flexible plugins.

    See the release notes for more details.

    - +

    17 January 2007: Solr graduates from Incubator

    Solr has graduated from the Apache Incubator, and is now a sub-project of Lucene.

    - +

    22 December 2006: Release 1.1.0 available

    This is the first release since Solr joined the Incubator, and brings many new features and performance optimizations including highlighting, faceted search, and JSON/Python/Ruby response formats.

    - +

    15 August 2006: Solr at ApacheCon US

    Chris Hostetter will be presenting "Faceted Searching With Apache Solr" at ApacheCon US 2006, on October 13th at 4:30pm. See the ApacheCon website for more details.

    - +

    21 April 2006: Solr at ApacheCon

    Yonik Seeley will be presenting "Apache Solr, a Full-Text Search Server based on Lucene" at ApacheCon Europe 2006, on June 29th at 5:30pm. See the ApacheCon website for more details.

    - +

    21 February 2006: nightly builds

    Solr now has nightly builds. This automatically creates a downloadable version of Solr every night. All unit tests must pass, or a message is sent to the developers mailing list and no new version is created. This also updates the javadoc.

    - +

    17 January 2006: Solr Joins Apache Incubator

    Solr, a search server based on Lucene, has been accepted into the Apache Incubator. Solr was originally developed by CNET Networks, and is widely used within CNET diff --git a/solr/site/index.pdf b/solr/site/index.pdf index c4d9c19e481..dd824332b60 100755 --- a/solr/site/index.pdf +++ b/solr/site/index.pdf @@ -5,10 +5,10 @@ /Producer (FOP 0.20.5) >> endobj 5 0 obj -<< /Length 1398 /Filter [ /ASCII85Decode /FlateDecode ] +<< /Length 1441 /Filter [ /ASCII85Decode /FlateDecode ] >> stream -Gaua?_/e9g&;KY"$M7)XjRp5brdI3q:4!th&C5rI`+6eo?B,`po'J*cXjUdAa@N;m.6l2qs64dP5M2V%*!7nn7!9(YVVGsl\%=JmRBqc[f5NS8K+q"2,#M,NspYsHXD6K]9G=<;p!ls'2EG#5O)-DY-+Cdq.T?9hZkXj"&d_*`/:;42!/fUo1`l7Z@?BUM$0#4'1QKMSVbEle!r=Ld6p'7!tc)](1i&L0k]6fHb.(5uH$LIKH%P"`Y![b-h$/U`=OdUF*A9tW&2VT?-NSWUc,7CP)U(@]V%H:hq$hVKq%CJaLr4-?,l6b.$O=k0!@DG-)OMe_9p2HILMd"'daIR?\)m]<:lkd'14Q`(6R9:_Z7p<"8ZSkagG=-Zc6Vsp23F^nY@Ze+?IgIE!$J_ZpC^-2gYNhSR)$M@]Q[9ih+9r,[[oNS>\_n=FsNeJ,]#[UFLA`gX\"$LE4=MqX5[N!oE^3;E)0Ol[A8s1OP!hR889a.^(kn*?1j^7*P=rr.k7pMFYT5e:e*[FK`W7DO*.(n;80gtIb;'1W:T?7@SaH'lXs;pN7"a45*2mOp_s0M&$%4:L3&BuERMSb`isZj;ac'ldnR4QVY9lG]kPEb#lQO1pS0]@1rSU*DX0;96R^467G0Jtp9XX;`QHHMqO4O7$%dcIi//SGLXA")16b,rh:d*4CXooCBLMi0.ANjYL&:3lbW3QMEh[FofU_;!('gYRg\^P&^F<~> +Gaua@6$&eZ'Z]\A'^D[C+^.G[!PNT\l))p'GM@#jfgFMHN+JJ-#$4'5o'KK[)a,Q,i.NrJ&7e!s]H9q!rA!bRPBSr,@Ql,P[[[p+0:i`^RSZDr+%;=j9_\a;,"/aHqJ5prV/a4H([."cU%r_4k3i.":\=nf#L./2^]C1AJIb61!QW\'c"Jmn:[>9qrG%cSh0GN'V96rT*KMF;%`b>tf`=+PIaF$D@5NF.:73(T9Og23+S"#sfk*2>.d+!*KSM=tJmIPQM^iof:GA^ZY@nUX?uUO3ooj_?=+2$!De'e9R^6`"iKnu"sgc8S>'Ep[J2C!M4[E`K(P(IZ/67Z[/LQ,J;2GknG+8It^s]KeU'j\pW32KP^'Xs=od9s7nsAqkgo:^!6`rD]k,BW:KM.Nk;`.Tb0#.a!<9^'D:A1=<>`4O=%MXTScEX*L-JCUNs*jihQ.7/joD##HK`kd'EM"QF.j$qcT4(\BV8hE>23f6688;b*.4BWVX1-0'[[d-%'g%#ftPhl+)gUjdP3)o)PJdoN"X2Z]%BS"`l.+ZGrGD7i[=[53>pLk&6UrIdEu"]tPaMP'0/G]JcE?MOrLmBs@]YQd=mU.WT@>Z8>D[pVTg(usJD+@lB<5WnRA!l)]c4sH?@pNF_kV_uB%(dII7fkrqn?pY5*FJ-G@go5/d]TP!n&OUQQQV#mnh2OkDmSa5rHh`5d(kf[BG?a)G$L#::-29H^7V:KtK'CZ$oCr@O;O]7%4*+i\Mo]hPTP;crKpa:gVn]Un+BpJ&"'e:8M^@RP1RQ:?q<4\,\r25.BcXfZCh]7e[NpGj,g=gHFLBlWeg*Kc\ChkL,f[J!8aZ5Y2%n8e;4*bHd[(O`>rio(lA=IZW,_/h9Y1b8`;r%c*(MSE,a'ISFd[jMk>dZt?L@JL1QMgf&.NB53De+@_chZm*Vrb-FCj7i8QIr+N`XTS?"H.GNXrJ#,'a]XR>O=Gr3tO9IfQY9,1U3"d3"`lmhD>j'iHD/["V\U!N12riDqp(!m4c.:fQL#]Ce?*Prq8e!tpafB5$b9YCfka&*K;9rNu[ORNU endstream endobj 6 0 obj @@ -43,6 +43,7 @@ endobj 44 0 R 46 0 R 48 0 R +50 0 R ] endobj 8 0 obj @@ -78,7 +79,7 @@ endobj 14 0 obj << /Type /Annot /Subtype /Link -/Rect [ 108.0 505.066 290.816 493.066 ] +/Rect [ 108.0 505.066 276.14 493.066 ] /C [ 0 0 0 ] /Border [ 0 0 0 ] /A 15 0 R @@ -88,7 +89,7 @@ endobj 16 0 obj << /Type /Annot /Subtype /Link -/Rect [ 108.0 486.866 483.44 474.866 ] +/Rect [ 108.0 486.866 290.816 474.866 ] /C [ 0 0 0 ] /Border [ 0 0 0 ] /A 17 0 R @@ -98,7 +99,7 @@ endobj 18 0 obj << /Type /Annot /Subtype /Link -/Rect [ 108.0 468.666 310.472 456.666 ] +/Rect [ 108.0 468.666 483.44 456.666 ] /C [ 0 0 0 ] /Border [ 0 0 0 ] /A 19 0 R @@ -108,7 +109,7 @@ endobj 20 0 obj << /Type /Annot /Subtype /Link -/Rect [ 108.0 450.466 350.996 438.466 ] +/Rect [ 108.0 450.466 310.472 438.466 ] /C [ 0 0 0 ] /Border [ 0 0 0 ] /A 21 0 R @@ -118,7 +119,7 @@ endobj 22 0 obj << /Type /Annot /Subtype /Link -/Rect [ 108.0 432.266 335.132 420.266 ] +/Rect [ 108.0 432.266 350.996 420.266 ] /C [ 0 0 0 ] /Border [ 0 0 0 ] /A 23 0 R @@ -128,7 +129,7 @@ endobj 24 0 obj << /Type /Annot /Subtype /Link -/Rect [ 108.0 414.066 461.432 402.066 ] +/Rect [ 108.0 414.066 335.132 402.066 ] /C [ 0 0 0 ] /Border [ 0 0 0 ] /A 25 0 R @@ -138,7 +139,7 @@ endobj 26 0 obj << /Type /Annot /Subtype /Link -/Rect [ 108.0 395.866 351.152 383.866 ] +/Rect [ 108.0 395.866 461.432 383.866 ] /C [ 0 0 0 ] /Border [ 0 0 0 ] /A 27 0 R @@ -148,7 +149,7 @@ endobj 28 0 obj << /Type /Annot /Subtype /Link -/Rect [ 108.0 377.666 302.156 365.666 ] +/Rect [ 108.0 377.666 351.152 365.666 ] /C [ 0 0 0 ] /Border [ 0 0 0 ] /A 29 0 R @@ -158,7 +159,7 @@ endobj 30 0 obj << /Type /Annot /Subtype /Link -/Rect [ 108.0 359.466 323.48 347.466 ] +/Rect [ 108.0 359.466 302.156 347.466 ] /C [ 0 0 0 ] /Border [ 0 0 0 ] /A 31 0 R @@ -168,7 +169,7 @@ endobj 32 0 obj << /Type /Annot /Subtype /Link -/Rect [ 108.0 341.266 411.112 329.266 ] +/Rect [ 108.0 341.266 328.48 329.266 ] /C [ 0 0 0 ] /Border [ 0 0 0 ] /A 33 0 R @@ -178,7 +179,7 @@ endobj 34 0 obj << /Type /Annot /Subtype /Link -/Rect [ 108.0 323.066 376.78 311.066 ] +/Rect [ 108.0 323.066 411.112 311.066 ] /C [ 0 0 0 ] /Border [ 0 0 0 ] /A 35 0 R @@ -188,7 +189,7 @@ endobj 36 0 obj << /Type /Annot /Subtype /Link -/Rect [ 108.0 304.866 300.472 292.866 ] +/Rect [ 108.0 304.866 376.78 292.866 ] /C [ 0 0 0 ] /Border [ 0 0 0 ] /A 37 0 R @@ -198,7 +199,7 @@ endobj 38 0 obj << /Type /Annot /Subtype /Link -/Rect [ 108.0 286.666 358.456 274.666 ] +/Rect [ 108.0 286.666 300.472 274.666 ] /C [ 0 0 0 ] /Border [ 0 0 0 ] /A 39 0 R @@ -208,7 +209,7 @@ endobj 40 0 obj << /Type /Annot /Subtype /Link -/Rect [ 108.0 268.466 336.784 256.466 ] +/Rect [ 108.0 268.466 358.456 256.466 ] /C [ 0 0 0 ] /Border [ 0 0 0 ] /A 41 0 R @@ -218,7 +219,7 @@ endobj 42 0 obj << /Type /Annot /Subtype /Link -/Rect [ 108.0 250.266 321.16 238.266 ] +/Rect [ 108.0 250.266 336.784 238.266 ] /C [ 0 0 0 ] /Border [ 0 0 0 ] /A 43 0 R @@ -228,7 +229,7 @@ endobj 44 0 obj << /Type /Annot /Subtype /Link -/Rect [ 108.0 232.066 293.488 220.066 ] +/Rect [ 108.0 232.066 321.16 220.066 ] /C [ 0 0 0 ] /Border [ 0 0 0 ] /A 45 0 R @@ -238,7 +239,7 @@ endobj 46 0 obj << /Type /Annot /Subtype /Link -/Rect [ 108.0 213.866 286.504 201.866 ] +/Rect [ 108.0 213.866 293.488 201.866 ] /C [ 0 0 0 ] /Border [ 0 0 0 ] /A 47 0 R @@ -248,7 +249,7 @@ endobj 48 0 obj << /Type /Annot /Subtype /Link -/Rect [ 108.0 195.666 350.464 183.666 ] +/Rect [ 108.0 195.666 286.504 183.666 ] /C [ 0 0 0 ] /Border [ 0 0 0 ] /A 49 0 R @@ -256,25 +257,33 @@ endobj >> endobj 50 0 obj -<< /Length 2773 /Filter [ /ASCII85Decode /FlateDecode ] +<< /Type /Annot +/Subtype /Link +/Rect [ 108.0 177.466 350.464 165.466 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A 51 0 R +/H /I +>> +endobj +52 0 obj +<< /Length 3042 /Filter [ /ASCII85Decode /FlateDecode ] >> stream -Gatm=>EdRF&q8_F@@ds.Z,2[cgXE,e=9=b@4mZ-fWXa=$fU(/p?TuJgWdtb2_KsHaRpPO5GqK\n%3T#39B""s5g/&c;!XA5r+EB6@AU,b*L%ZuFb2e(TAtn,sjUlJ\MrT\B5Fb-9f<(lCA_i0H?J1s?7-7UPZM7]P20-\5)HkuHIP6e&G#Hga.FA0]-WuWOAie+Uf4=)CP@_52r3MtFMG/:l2.#V[)>pVQ\1b+0'3WN.VP,+/`9fEecq^3REE(&e!>8IAI$8k+C9\N+)[\^701E"#*1\Spp/"pdjod6<=h_M/=SH)KG;G;BrbOgGj,V>ik=TM4^K/d3$8I=Ee13l&mCik/*qXGHNj'KYr;$eDjfFcEG^69fr2O(-'bAWn>g!rn44X5T)[$=#bqEuV=+7u?:iX87@c''@S8g?f(JHcY]PaD7Ji9XJOC[oXD#3[u_1dEj6AkDi>JSQJC;!BPJn\<41<(HQ3lT`^DUXp2m3S<4#=BN#la1!#>ODkrF(YGPu3aa%8u[9sRk!MJYTKh"KCqs7Oo(hk,/#&LM!Sac]Wbe;)>6,5Sn"?;$q%TWlEU5g%PW*Lr-bjBDb,DhH+j"XKe;NrT5ETa#Lpa&@c6eqaZ.IJdml@rg%DG-X"7ZG>jJ]-C:OK%N^LfU#94Q^-f2nP>Fp38EqQ&0nGLqT<)%Bd._NW]1\Kb]4bP`20e2s%.=K/CQXZ>6Uc[AXm9NT1f-$jol:iKDo3-*h$3>=1dHqWWY=@?_A%&K8pj=Ytr%=kAi]2&i#b,W2;hjJMbD8j1o,)beN;.;SWH.>M"0"A:SI,(U7AOcDn/M=a[+#rYuh]if>SJ'G@?-s+PU)?mRX![[$gR/.=+HO)u^;B/J_1i('jL7H0o#@`.C'=rmXK43e$S/>p5!qks#>$X(TBQTsb\Ln/TkCEX)8K>$9!Y\6:(FN?AEdBMok6G<-OP"%FqbYTG0QUtN+*_3b..-17?QdO<#.AdZKrj][pqu1Qa5WV%Kh0J`?)pDV7q0P60+;^A>C=s"GZ&54102ac(]4@\g#oC6/pBg?'Oa*q$PA4b4+.BpZXNoqBW>00Bh\JTGiTuN$01KZ8D27Ac&%!PT?]1&M?N'N/-O6>irUC&^3hbg>[LN`8mBd.[G7S$8L0"LuPbN_1Hu?/t.`QHVN+.>.TgclL_o=u:L>e;+#g@nlpq7N!'SQ2a]8:]_T:)'dUWZ5We!D)pOaWB"TD*k9Qe3I6"Hn(^1ta,(]?[QB_oW@b+IHLTc32IuiS`iDgUgL%"lt9`rsMTWd5IOV^bWrZY0.XOcgulf_NN^MV+;_#jn*d[S)n9B+h"*^IB#.g",^QI/]+TKWUe9irDKFq#cp%RhW1RuWR*.dQq=Qe3./0o@?BF(NW((Q24H)'"U'jsl"S.e,N5Hjr\ZW[F;RRc^b-TF*hp?JR=`+rjgB"'BeUcOf)mZ9Hu,b7%S9Q6J(=-C.OSmVq4(f0\h&P>WdjK.n3tVH,lT:Im`TaAO+3%N2;;5_=T2$n]-\^#ads7Wk9F9(J+,X?P(+%\F/"/S;a(tlaleLAL>2ButbP't&MQ4YQ"A-"2~> +Gatm>gQL@#&UiQ?TUm?-Bt4?PhY`OoA4_p+OuM)hPcAO@0I167(,'0ZgsPuah;G]WRn&=Y-#8>@:3QfrI2WR.]"`Q4J+`,m@u!qF\$S('fi^4q&Y1g1Je$u9g"oTbYl=3?2`gKec"%CgV>"Z_YsJVYmsic!<3Y3?-6E>mT]FRoq1EcIUQO2#3P`J,Ho+jpZ;2A?'2/%^9ITPh1'+G-Oo:O:)B8MU$qa0:u^6'To;\8PD<4Vqpu?8+Vt`-^5`""l8@,AEY+H#^mRdkL2Vs#8TrOU\?-aY_m.Gpu]V;W5O1,6F'O_(BMY&;Nmc".>AAY+]rMh_&$m<=:6ECse)h.@=uReYKjD6huZ,b^n5:uoTOErNYRf!=t5ajYBEqJH^i:MJ;;-ro+OSS$9NYpRWsWJ[M15H>+8O(1W&dEs*<1t^X'cB4$_0C]@iKou7tL:qbNao6io\`i/FON'lor;H,-SN#0ka]/a'N5lBMf]I,(XD(N*.3YFC8Yq"a4jU@3BFN0!NXP>tZ"ssh3WfT7ke>,IhWp/(Dnnl,=$s>I`RM./Xe6iFI:1I=$C:m=jO*G$@_b6dp"Zc"F#!F0BN(GEA]qK`$g*.QiOD7%"pl^^G=e+pMPjU.91-VblKNtP0[&t75`KCT/8><"k4Q5TbedtW8Ib.J\[d-6.3Fo&f,opm;d(9%<"?VN$l3VN71(+k,@s>@n`5hq1#B>UC9o+J^J1fL/>pFi[=l5",YH(.+nY4+4sJ*?4OIYt$?bIM>$a.ABQbSh;J-!lHiTh2*nsBM3aZ8TBGl2ijW5:D>b;0`j:,N^k-s;Ajs`+Gg3"++VMNXBOO?Z9E.iHb%KaJ=U;$[RBCH5?22:FZXMddP8uoM##N3JFY9_mM][=%@4BXu("^!3k)gl:RemN&!Im;tMM7t<@>m"7aQ!=S,R9Godga,XNDWu:@S48&;/$0R#YrsX](dFdM4g#U%;d[Pns*'Tfs!U:4OIU0.bBE/.ja@0$>Y#cDR&ote#6#?[MF33_NAI`7,W9Z%oHcrdEJ,Ad/nf?uEX1!6nc1@[Tj0d'm&sMlhhas0X=3n@(LKW,dC]Xr2"WFp0p#n)M;9dmk@'bjUN1&IX'J7qoQ90lk-P27#&Utg1d'mA>p"eQZ.BjR&D2XiM6rOBI&ACQe@4Bd9n`*bM)_gg3B1l;b8m83Hl*qo.-rancf((lV)(f7OGpo+:fW7qXD?gK!X\=d]e8$CLG`l9WqV/0ZL_"\U)I6Hj&DqD2%qo/`OMJ+!aT#:dB^&iAnY[9f6,*Q*<,/9q>rg8K($9X$SO*IA^1jbpN:fTR''ZHAITX?+)8I@dGP!g>c0pqI"J>mTCjLr@rCWl#o/Kr,Q0uH2/Lm4fn@Y!R9KDZ.bYJPKE%G[9M"l`Ap)j,L&8nl5C[3i@n^s5oT[6gn9Yh(WN*8K9Iu2M*"7&E/.cUaJ,]n:P>o#O7P;O;i.-dtD/jmb',a?Db',q-?j^e]&P#U!2E6r5`e.<5]^Xoo1qiQdXrAkL+^p0n75MY%EdQB\055tVj\9h-U2r*b!EA#<0:9>)Mf?*-;"ekBLFiHoUi8>22C/WNqiLOA4abPuV*eHC'.:#:^1;D[*74r]IdG*t3qVu+OR%Y('eqcY3q9]W@*5@f#3'``@>@h6k4hOR,k$'qE$V.5S`+^2H;^4+)SE;bfhmfh?ds(InZ/_@gQ0(#mD&+II_g2HlEDj+eO4\,-rcF*`FAYeL:TgNp1~> endstream endobj -51 0 obj +53 0 obj << /Type /Page /Parent 1 0 R /MediaBox [ 0 0 612 792 ] /Resources 3 0 R -/Contents 50 0 R -/Annots 52 0 R +/Contents 52 0 R +/Annots 54 0 R >> endobj -52 0 obj +54 0 obj [ -53 0 R -54 0 R 55 0 R 56 0 R 57 0 R @@ -284,9 +293,13 @@ endobj 61 0 R 62 0 R 63 0 R +64 0 R +65 0 R +66 0 R +67 0 R ] endobj -53 0 obj +55 0 obj << /Type /Annot /Subtype /Link /Rect [ 175.644 529.266 212.304 517.266 ] @@ -297,7 +310,7 @@ endobj /H /I >> endobj -54 0 obj +56 0 obj << /Type /Annot /Subtype /Link /Rect [ 282.636 529.266 342.276 517.266 ] @@ -308,7 +321,7 @@ endobj /H /I >> endobj -55 0 obj +57 0 obj << /Type /Annot /Subtype /Link /Rect [ 174.984 455.266 225.972 443.266 ] @@ -319,7 +332,7 @@ endobj /H /I >> endobj -56 0 obj +58 0 obj << /Type /Annot /Subtype /Link /Rect [ 321.636 434.066 365.976 422.066 ] @@ -330,7 +343,7 @@ endobj /H /I >> endobj -57 0 obj +59 0 obj << /Type /Annot /Subtype /Link /Rect [ 183.996 385.732 204.648 373.732 ] @@ -341,7 +354,7 @@ endobj /H /I >> endobj -58 0 obj +60 0 obj << /Type /Annot /Subtype /Link /Rect [ 177.66 372.532 212.328 360.532 ] @@ -352,7 +365,7 @@ endobj /H /I >> endobj -59 0 obj +61 0 obj << /Type /Annot /Subtype /Link /Rect [ 153.324 359.332 197.664 347.332 ] @@ -363,7 +376,7 @@ endobj /H /I >> endobj -60 0 obj +62 0 obj << /Type /Annot /Subtype /Link /Rect [ 148.668 346.132 203.34 334.132 ] @@ -374,7 +387,7 @@ endobj /H /I >> endobj -61 0 obj +63 0 obj << /Type /Annot /Subtype /Link /Rect [ 108.0 332.932 158.988 320.932 ] @@ -385,50 +398,68 @@ endobj /H /I >> endobj -62 0 obj -<< /Type /Annot -/Subtype /Link -/Rect [ 378.276 248.545 425.604 236.545 ] -/C [ 0 0 0 ] -/Border [ 0 0 0 ] -/A << /URI (http://www.apache.org/dyn/closer.cgi/lucene/solr/) -/S /URI >> -/H /I ->> -endobj -63 0 obj -<< /Type /Annot -/Subtype /Link -/Rect [ 127.992 200.945 189.636 188.945 ] -/C [ 0 0 0 ] -/Border [ 0 0 0 ] -/A << /URI (http://svn.apache.org/repos/asf/lucene/solr/tags/release-1.4.1/CHANGES.txt) -/S /URI >> -/H /I ->> -endobj 64 0 obj -<< /Length 3853 /Filter [ /ASCII85Decode /FlateDecode ] - >> -stream -Gatm?>BAT.'n4cZ;Uf(2"($K[OPj$.:<".`iq^k)n6BoBTW@sJTC+@]^[O..q"Ob3E?06:L*?IZT]5--lGjBW_bJ*.%i9`-\2l-*r5^&FocJ,9`ZsuBqqZt\8\+>_\*51fEC)V:g`JKTqVcM?W`K2IiV[Z>b8mWcR5$RbQg3b+g5;NTRoc)/Dsi,>8bKlg:[^4YKE-p`J-+%'5f%G-6Jk?93DaNs-`5;RBlhnt*/(Bk7T*2.LumAiRN7biXfb'*%2ZEI!C@gBGDS\#BLe.p7nLYlf/@-8NN7meFO=Z*;[@,)"Op"*Eh9EH7=<'5c$3qICg[WfnZoSP6u51XA0kY5M?jmIEsK/+[M:3:f28u'J&lD9e`c19_Wh#2EAS",7,32-q>U_f@0QF3I[H*r=cXh2jAl>T.I%85SsP&6%e#WD%Ml1'LcQAl*JVf9OOPD>#mM7LeNU97cJXr>:GH^nstc@!=Oc/G?N;r_@Q^'=SQ((Ju]SS>]X+S1gu(^Ip(/5_D`SCW*Zs9;A<=94,HWi+)q^"cisF5q!V]sHA]K?\<@um_LV^$#6eDP8XqH>!98W?uLC88qdWi)K<#,65t:G#%_5(hS!+!5L*3,c+p33Vn$Q"#%]auue??52Qh57N=+X:>-fD)0%9(G]n[8%pSI;lG6-7ML#VPtela:$D>jueQ^T_%B5p"5s.q&aEbKt'(H,YWkkcL?"q5GY?O*qXG^3)+dn1\N%.?AocZ<;`[KDVKpjY7J,nEK@,cgELL-.QcAHYOR)!VU[S#;%WS/j)X@P6ZH-bYK[5@")3VKs/Xjbb39McL7uOeU%&U4&6FFMr`,HJPsj`r/Uq6[XtA/E?e?#-XRd'@:Q9b_>?8ssPe2[@%Le-L[[cjT5E"].j6!`"H^E>aHWm_hca@@QrH]pi'C2s2fN1'GI[;sljZ?]`]\Uk^L`hP=o//'\Pe>q$<--A\#]oua5J*fm9J_ok5$%p6^06[s&-l8)epYndW+C]bm$X430)#l_ijBl+;-K?";&V%Dai_eU/WYDZ`j$q2Og4G"Vc.7lSBIH2`_MO]tVrVNdP>3"7:-HpXZ4q:aM>RbK?^oM$X3N]^p@eKHr$9&_6bG>La9,XJno!=1tp4N"#f<8;Lr4T-l09&`HX[@]_!c)R.RV(H"nfW"))jTQg.:>Xq@RocmtkkL1p8ojc$HDnCHTS==eWoUg4Bq*s*BmHS64g*&:p$12h;q\4XN&9RDe)hcq2NM;4ZHgH&KQ:A->h"RT\Fj9TJ%>_6(V4D6L,n\n]8l\A]He$Y7p%S>1EG)^mM2maTB4F),2duS1nY2bg-F"%H4;`q+/?0CQA?Z$V12E*+5SgrJAR/5jd"Lm3MN[`s>pYBl?86'X`L4N7.Nlp7-4ko^fQom>>%=g0'X>>[Fd#$.FcR1`Z0Mdf,k/GVcV*'8Jt!n.-LpX^YcfFu*MUj2d[!H\I&P@EY:cJu@X4it&>8T_H4-[)<_\F2u_+XX:a&rej1?j$.AA];+2'm1UQC4S?BZ_OgYkI?GH;hN+82dXAP7kI=_LdVI)5IOGLm_IU\B&nYDRd;-a)%(22(8<]tLne/Zp!cY3/tIQV'9`V3HPKq!JIidRIWodh\mg6aG*l#be&JBL8M:OS+H`u!++$,m42ZM+7g1"-Z4)/^k^s,=3]'q:5)A8p,T\QKjQ_c(V/+p*1pJPH.M7NJ)0Lg>&P7-&P!:7AB_OODU?kYpE+n`o#``@nHtIr]B\!Pcj^KnT*aGL'iH`!7EmEF&_jW%G5<6aWg3&@d%*%B$J]]Q&-QjC2@nnda'+.<+$:hF%tC!q..5*Xt6XP5n>Z?EV,5cB>FDMHb?i;=T(MH@)nC=%\j=SY&5[O>ocpD.(u-muW:3-3cR/CIl\nrc3$LjBVq1$jM3O]<2C,mn*8ll<-MW33#d/M/HDlPCEM0pFPEBSTE\kU;;oQb8a"V:'L43`PaW*f2l5#]*WGHf*QQ5?9OGPW4LL!#B>fh'QM?*7JH"@ffWM&6-]_D>0,1<`Ur"O0m(\/GHq?3ZZ/X_.bl3hgT4?EeI$"!fW?8"".2LjENuW"L)(CPGF97qnB7Jjs_,S"bh2h_A"1P+VK8B6IM-nLi-:8c'%J=!pmtpe9F8%g83h[u_Z&GPqtcE`^uM!oRua3mf5ObGF!XD8//%1#;;rl^h..n)>0($n0!T1RqoeL$Ff3frQ#^UKD=?Q8@T?*JmUm7;'!!Sa"FNsMJW7u$g6)_3[mm(>9/KGK8PFE[I>FclW1DJfU!@>J3l6`:8r]?qM0=li65`6k7T2Vm\VIDjo^LK/>'&d9!4-47p7dZ%puOjCq'&rok[.r59~> -endstream +<< /Type /Annot +/Subtype /Link +/Rect [ 356.604 248.545 434.256 236.545 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A << /URI (http://www.apache.org/dyn/closer.cgi/lucene/solr) +/S /URI >> +/H /I +>> endobj 65 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 108.0 175.745 210.624 163.745 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A << /URI (http://wiki.apache.org/solr/SimpleFacetParameters#Facet_by_Range) +/S /URI >> +/H /I +>> +endobj +66 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 133.656 162.545 198.636 150.545 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A << /URI (http://wiki.apache.org/solr/SpatialSearch) +/S /URI >> +/H /I +>> +endobj +67 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 292.62 149.345 451.296 137.345 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A << /URI (http://localhost:8983/solr/browse) +/S /URI >> +/H /I +>> +endobj +68 0 obj +<< /Length 4146 /Filter [ /ASCII85Decode /FlateDecode ] + >> +stream +Gatm?=``B.&Ur?8R(T5h`1sRV@3e$.,3$Z0E6R:u1i%?8'Ju!Be4L@%Me4X,V[%)9g4,N^h)H#k^'TS*_bN`%&D0l3PS`jF/q0mYc7k:V&l1(9E[2dWir00(Zl)ARHkH\iah3@-XI3ruoJAf`$rFU@#MITh+eN+Ye@D57PiD_!;O7;NmP"lAf^2NUjHHP!E-j4#U>Vp]l"7s]o5XI,KfclO!o1.C#ZF[$@LUP%J:lD^7Y+EVp-sU**Gn:4?JJ\&eOG$pQL$Ho$5FbO]n%5ofZT55?:a4F:bA(mM`.25Pnra.Z`E#PS[bIS0I[tOA0VHAZ[^2CTENqbiBoGX&`(P@QU?S1EOOAI]C\8[`Qk'PnBZef7/4@n8Q`\_L*h0E0.q87=l-h9_S2LLZ*0UD;G[h0kS+g1S*q38j/\T-1LP)Pg_r%7"$"-!K'+_Bs#T$tp:`m/@.AnR1%J"EpBc#q`JH*A8@nI%B[dQ%KX[M"-9g-17GfJ_,g#YOa+GP7W`6"C]\9t&XXE*)."Ol,7`OMSktO-t)]GA_RNZ(Tk.ept?h@Nd-M`(U#ZdOACSUmG6ZjQD<]-b5%cP)h8k-0_mIX3tWgG)'h5FaD)!k[Au>CZ)+m>=dok_(_@AgBgu1n'/-%r9&hO`2uZN;uV8I!Qt!uiF>+^j_K>&b]6ZD/\Bqt]8*Dk.*+iQ4>F$EA_(AssHo3)h>!Q?j7%H>"k(e`A1+Uq=!"[>Xh2<,%\I08CKsT.A\4M@bkG[F7;6h5%O8US#k,G09g=%1>*u&+*etIFEKqXaAL-s0P>gk;ks[[cof#aRZE`p2,-QR&,hNk#D$[i2A1H8kojK)br';'K?a3)Z$0m(eUUTE)EYF&F'7H&(sA)JI=c+*@2a:@:erWpcR5#AZHrZSoJ]ZB,abuA8B6*PU:.R=#IM2BjmeindP"l%5^-,HMWSI(IVX:9N:s[>gAKDF:+E[5Zl:iLc!IF\>V\X2%;4mX4e+N@n?(%GkJe:Z=)BV3:VQ"E$BKO'dOp*rd#8+s^X:561:'*2ZWXB5q.ILAn-u`*HP+,/\3%ksl:)f7.QDY6RRN9XDcg=)kD-ab,;=)[9%/@[jhgS=`U5%`CJkaV)d*ds)Kb.cB4^Ci$@/+muE%1cl3^g9,E:9L.l/&O2au\50e'RJG#=%>X/b3Wg&Jn(csmCF"uZO210*qAdL:@,9'Wh$#SCSU!k53CN!KJD\i`[U_bST*DN`ElNF;*uG-*0Ql'E67.IV)df]#fi9/Nj^R7[.$7]N7BJI5Vun^Z!Ak)<#`@ssM7>/r4smSb'eJ[V+T`=/#@9a\A\+'RiiE!&T!3bt[Dg>CCl$_sJ8hoVYS'BXqflVC-Rhma/II1*7pNA[l50(;jG@XXs4:*'n$?WWOXT\^%jtGel^XYIJ@5H)/3Y9Z9?GPa5+^1i4SLi)BF^mMP!D5d%e?dX2btmJJj5Q;YjsWe?WL\Z'DNnB/:EM]i2T0Wf?=sG9j"BN/M"+MQiTMOAECHR;/?Eq/2NjWc9Pj_$RPFV;el`]<$:A^1>>A5Z%]QK("4>-SMi<@WJiDYZN6Eo&47l+.h"^8,.?5gXm=ZCGCi8Us'QNB6\("j/(KEaCI71W=5KOp\SP8$ga9afDC.$_rE9$mnfMH9[6Bo>fhV0fQ$o3prgG_VXj)ltGmj]JB,3G#a>b7HV7qC1rh-fs6&GJsd0(p[[I''4QjtUF9SuJ@2@Sf6'.g>D?+_XN_6S`./]Md4b)kP)(c9;4VhWOL?[!\EJ!h-a@&/7gsIhb47XS[cAh+=QZaXWumF5i-f:fgAgNn"+?*.ptK:^*449_UB+\imp%KV`SQfX+Cg@tdODgG,Q=t&+'#p&Ehg92ia%Cf]S2TJNP*Hm)Sa;h6^q,g/4DkgZaR8k>=*C31iLSEG9%Ct2H?0oSuB<80(Y86NHa<2\-p7IDqYWG,\f8f0:tY?Dp]'CYP@&?VQI0+C_(AjPrk,S0$7HEc:<8\nZFGphVGKY[`T=Dm9]6e<`e[6ane3QRM#5a2;+UlQ_hg1]X`KohWAPk-bAHS!=P4Q(RCEs':L5Mj`3l+bXub!hr']\IsY,U4MFWE"5,G_'@X2r_A!k$@DIAV&=iSIi>V^6G6NorIZtiJD;DI.[?Jlm-^(hl#8_QhZ;I[\VRlB=`=9Dd^-;-aR5.`Qi:mHuUM[%JnR+pl[6K+OL4#k)aHFA[[4:VE8Cc2m-'.!13!GZ8rqVRh.mClWdu%6oE-MZrelNF`=l/i\.@=2a_s+,)TGW*^qrUE;%a-m]!7N?UiTXOIO,cD;NHH7o;r\p6(Y2kZC,i0Y-g8W!,\mJTDKp?[QI;2a^:m*M`]rrA@R%)r~> +endstream +endobj +69 0 obj << /Type /Page /Parent 1 0 R /MediaBox [ 0 0 612 792 ] /Resources 3 0 R -/Contents 64 0 R -/Annots 66 0 R +/Contents 68 0 R +/Annots 70 0 R >> endobj -66 0 obj +70 0 obj [ -67 0 R -68 0 R -69 0 R -70 0 R 71 0 R 72 0 R 73 0 R @@ -436,147 +467,9 @@ endobj 75 0 R 76 0 R 77 0 R -] -endobj -67 0 obj -<< /Type /Annot -/Subtype /Link -/Rect [ 227.292 660.8 372.912 648.8 ] -/C [ 0 0 0 ] -/Border [ 0 0 0 ] -/A << /URI (http://lucene-eurocon.org/) -/S /URI >> -/H /I ->> -endobj -68 0 obj -<< /Type /Annot -/Subtype /Link -/Rect [ 196.992 634.4 316.656 622.4 ] -/C [ 0 0 0 ] -/Border [ 0 0 0 ] -/A << /URI (http://guest.cvent.com/i.aspx?4W%2cM3%2c4d21e59c-fd92-4a83-bc1f-175dc506905f) -/S /URI >> -/H /I ->> -endobj -69 0 obj -<< /Type /Annot -/Subtype /Link -/Rect [ 108.0 617.2 286.644 605.2 ] -/C [ 0 0 0 ] -/Border [ 0 0 0 ] -/A << /URI (http://lucene-eurocon.org/training.html) -/S /URI >> -/H /I ->> -endobj -70 0 obj -<< /Type /Annot -/Subtype /Link -/Rect [ 108.0 571.6 211.32 559.6 ] -/C [ 0 0 0 ] -/Border [ 0 0 0 ] -/A << /URI (http://lucene-eurocon.org/sessions-general.html) -/S /URI >> -/H /I ->> -endobj -71 0 obj -<< /Type /Annot -/Subtype /Link -/Rect [ 108.0 473.2 420.312 461.2 ] -/C [ 0 0 0 ] -/Border [ 0 0 0 ] -/A << /URI (http://lucene-eurocon.org/agenda.html) -/S /URI >> -/H /I ->> -endobj -72 0 obj -<< /Type /Annot -/Subtype /Link -/Rect [ 108.0 460.0 234.984 448.0 ] -/C [ 0 0 0 ] -/Border [ 0 0 0 ] -/A << /URI (http://lucene-eurocon.org/meetup.html) -/S /URI >> -/H /I ->> -endobj -73 0 obj -<< /Type /Annot -/Subtype /Link -/Rect [ 108.0 446.8 280.632 434.8 ] -/C [ 0 0 0 ] -/Border [ 0 0 0 ] -/A << /URI (http://lucene-eurocon.org/beerfestival.html) -/S /URI >> -/H /I ->> -endobj -74 0 obj -<< /Type /Annot -/Subtype /Link -/Rect [ 369.276 393.547 416.604 381.547 ] -/C [ 0 0 0 ] -/Border [ 0 0 0 ] -/A << /URI (http://www.apache.org/dyn/closer.cgi/lucene/solr/) -/S /URI >> -/H /I ->> -endobj -75 0 obj -<< /Type /Annot -/Subtype /Link -/Rect [ 127.992 207.947 189.636 195.947 ] -/C [ 0 0 0 ] -/Border [ 0 0 0 ] -/A << /URI (http://svn.apache.org/repos/asf/lucene/solr/tags/release-1.4.0/CHANGES.txt) -/S /URI >> -/H /I ->> -endobj -76 0 obj -<< /Type /Annot -/Subtype /Link -/Rect [ 447.636 156.694 493.632 144.694 ] -/C [ 0 0 0 ] -/Border [ 0 0 0 ] -/A << /URI (http://www.packtpub.com/solr-1-4-enterprise-search-server?utm_source=http%3A%2F%2Flucene.apache.org%2Fsolr%2F&utm_medium=spons&utm_content=pod&utm_campaign=mdb_000275) -/S /URI >> -/H /I ->> -endobj -77 0 obj -<< /Type /Annot -/Subtype /Link -/Rect [ 90.0 143.494 226.632 131.494 ] -/C [ 0 0 0 ] -/Border [ 0 0 0 ] -/A << /URI (http://www.packtpub.com/solr-1-4-enterprise-search-server?utm_source=http%3A%2F%2Flucene.apache.org%2Fsolr%2F&utm_medium=spons&utm_content=pod&utm_campaign=mdb_000275) -/S /URI >> -/H /I ->> -endobj -78 0 obj -<< /Length 3743 /Filter [ /ASCII85Decode /FlateDecode ] - >> -stream -Gatm?>BAT.'n4c<@.V_j1D\74`DYAebC&7mMZ+Grc@KVdVf"XBRjH;J&#qi&t16.]+T'q%d3*1'IFaNZWmDU.`Y-imH5?t"Em9!4"_>f-aI(g$&`"#,kimbQOq"r%\=TL17li93JqDtug&&,U1?9"L?"95o40,](Zj94%NC&RpugO3CX.fdLHhqTr00IufNTZ\qIntMk3(IraF+"0XG>=t?M,N0SB+R)$Tg_qM(ZRrDcjn8N5>O0&Tm8'/Wr6u2\'@`72:^jg7<^en\.H^GDh]SAXQ,m?5hW1KS=Z&um",e__Tqk[uHJhW(U\J/"_RL%Dc'4d7`#nS_5J()PZFs\S@eU5`i$Gg3Pug&S'i.4[A$GHWQE7)*:,#/oTVpN_I7(#o]VtSk&T&`+^aeoTTfZP2^G.C4/bo/C(mQR)YBMr*R+M>`%mR+%bSUS"b*ZW@T0&bmJLmR=NHXSoAt-Tu=3n?P%F8V_k?A5J/rr+BBMYm'$G576Xg4\WQlH9ebF!L,078bHn/Se4UK*1XIB4@XK_0Wq"hA]qY7OFV@#-jR*%)&$Ge=pMS6!RL\K/0'Va>Y*pc14Au/;E(tc6OOI30s85]7/(Hl-2EH>.)(<6D)_&3)g[gb6I)McS/6/p\Y3/'\lk/slI18lWWY:._0)AO6u6PlFWTUHj7@k'c$gK1Ia]3N.tX;S%S)!%]XWh!n\ib6;Bk9ogc?E'8M):&g&](m@b&#;HL5.>`+FSY1A:Ss-KF9H@u2D"?/csGlM.1-L;/Be57K0HtLNMoenftq2AQkc9RmTO*f>M5oRh!nA`qP4r8)1V[G;cIZ`(au:LY9KC[:[h6f*qNorC$]Xj=LIr7&GiMH\`rHa(r%g5et40dJe"NkQJa/L'`nMjeZ@[,GunU;Y&#Z5!-TkfpZ)alU.f]/Ns1KeNW=uUNOGm>Q&XVqGaHP_A4S3"\-&^WBB%d>a(.7C1'LegTOL)B\X;++P(]]4m1+5A8.6)#`JV)%)AL$T)JB=Y!X?P/O@hni&uaG@E[jN9_cWcb%D8diJF5rj_=p(r""/&%LmWXo&J3ilHH!P6I_b&Jk.c8Q4e]N.YLImg8GgZ$_i;&XhYU64FV=8>52cHea"(UXL#%@+?1F\@ha48PUMdhtM1Jg)NAqfTeqr:7M'+6_/X(pp?%'Ph!`,sEc)\/V`Vof4ip,5ZXI(Dqml3=UD"o!(h_M>_r8NA"+-HfVK8Rs_5E,I.qtd(%S@.Qa+^UhaE=Yi\8lap0uJ8Ud&GpH\uF._XZu87\`rVElYC^ghhmX''eT,T;ZH:"d$1`)UjG-@ZaF`KDD^3qDT*I^&t)`9>t*Asf;qCFF!X>/LdW+_J<9G5&4LuM(i)f]2.05qHcrE9&=PjuH&c\;?Ef%K.#QZkK=PJ=5[XjYJYHdSL[s#X_DeuT%7q-B6$3XM.EXdqf`Q8B%qi5`b)a7RpXqZ0p,fNjdgR,j-VX0r]E9kC!W5/%@%,$Hd$;$!G2Mj/oGBm2n`>>T>Pu9[f6`GE&%)]>52T#Q4"p4Acp!bdpJo,aVTnt2c&ak`XWh[Cb<2YI_jNbT15Z,hKZkBf-t5=\Sk^$"Yb3E7Q5*+:1]`+tc^9.A3f7CL`r8HJ("r5IL=HG-_@Q#^=CG;\MT@E93uqKQ99ti;+VGds,%?NR84c5Dqi'(G\tS4j&P7CTh[>Z8IXkH6FYiKe*OMXONrXZn+4E0/F#Ks[aFif2)P2-&VWDp_KgG+:ZI)k-ZlfS#1';%8+sGq_N1H=Z-;=m1HU[3^X,S:)6ApqKQBb03g(c=[PC;m^=Xf6LI\`%%Gh-hfVJ',]'I(sKTNXV<[Y!ioMTB,u5?FfHiY`(h%ud)M!W=1fp4T18]kV>1A;nCYFk4L;o;j+ROX!-(acQoOtPhgGF-G^%pUF7F2L'h..*k\Fb$5)S8T""&Y1A)'MLo\)Zd;al;n9Id0$NNR+FpFu/ac<+@X!RgAk%'t87:'<5I@Xu?1cr>cWbS.mflcfb6Z>iU#7eCF5@#CTIJoWQ0'(]%FsXur?,ou[3`aM?/V@+W%hS1Of(]$E,=(mJW2,,5<*?p?OoQ?bl.#Q(uSI]GM6<1UuC'p)<&Ae\$E+*s$J./0.m3h_Tfi1R#I%eqpei8BQFbg1K^:1s$UaSDOC+K+L-?UYX:U'%sgqsbRO#u'i0NF;"NB]h<)T?,/a5(O&0:c'2k>/,5i&O0,G^>_^6V&Q\GiscP+[E1/HgqaanTtOUg1c.?[0dts163kPU-f3qN6DR5((HGLUhC/o"][CFhP-HQ?g4Ni?BB=NfQj3D,W9Pj7G1#$fAT/cH-6Pm@HQecC"*TnB/rU^7/A]VGYT@!X4)@:)S4:^0'eunU`+n.L,K%8TKJ+kq0)OY["5NC0(^I5qPFg\%M='DjF!9!'Le7uf7ig,2-ch(>h1`c#^$DMKA748Q(i'R9nsI@dUbjGLfB!4,*-[-h0e,-aHZn#Dg.\S-kRQG]=9E0<@K%NC#-%?mW_69m/LVs3)c)oaE<`kfPF%EFO39g]&In^DK^nL?feeD&s!RGHSmqk<%S+O][*^h-Y98CNN:_J"a275?[js$%9E0~> -endstream -endobj -79 0 obj -<< /Type /Page -/Parent 1 0 R -/MediaBox [ 0 0 612 792 ] -/Resources 3 0 R -/Contents 78 0 R -/Annots 80 0 R ->> -endobj -80 0 obj -[ +78 0 R +79 0 R +80 0 R 81 0 R 82 0 R 83 0 R @@ -585,18 +478,125 @@ endobj 86 0 R 87 0 R 88 0 R -89 0 R -90 0 R -91 0 R ] endobj +71 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 172.308 664.8 311.28 652.8 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A << /URI (http://wiki.apache.org/solr/HighlightingParameters) +/S /URI >> +/H /I +>> +endobj +72 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 272.304 625.2 327.96 613.2 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A << /URI (http://wiki.apache.org/solr/SpellCheckComponent) +/S /URI >> +/H /I +>> +endobj +73 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 351.288 625.2 381.948 613.2 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A << /URI (http://wiki.apache.org/solr/TermsComponent) +/S /URI >> +/H /I +>> +endobj +74 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 108.0 612.0 155.328 600.0 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A << /URI (http://wiki.apache.org/solr/Suggester) +/S /URI >> +/H /I +>> +endobj +75 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 200.664 598.8 270.984 586.8 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A << /URI (http://wiki.apache.org/solr/FunctionQuery#Sort_By_Function any function) +/S /URI >> +/H /I +>> +endobj +76 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 108.0 585.6 231.996 573.6 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A << /URI (http://wiki.apache.org/solr/UpdateJSON) +/S /URI >> +/H /I +>> +endobj +77 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 108.0 572.4 211.32 560.4 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A << /URI (http://wiki.apache.org/solr/CSVResponseWriter) +/S /URI >> +/H /I +>> +endobj +78 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 108.0 559.2 234.636 547.2 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A << /URI (http://wiki.apache.org/solr/SolrUIMA) +/S /URI >> +/H /I +>> +endobj +79 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 426.972 546.0 519.936 534.0 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A << /URI (http://lucene.apache.org/java/docs/index.html) +/S /URI >> +/H /I +>> +endobj +80 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 127.992 522.8 189.636 510.8 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A << /URI (http://svn.apache.org/repos/asf/lucene/dev/tags/lucene_solr_3_1/solr/CHANGES.txt) +/S /URI >> +/H /I +>> +endobj 81 0 obj << /Type /Annot /Subtype /Link -/Rect [ 90.0 374.347 164.988 362.347 ] +/Rect [ 378.276 458.347 425.604 446.347 ] /C [ 0 0 0 ] /Border [ 0 0 0 ] -/A << /URI (http://www.us.apachecon.com/c/acus2009/) +/A << /URI (http://www.apache.org/dyn/closer.cgi/lucene/solr/) /S /URI >> /H /I >> @@ -604,10 +604,10 @@ endobj 82 0 obj << /Type /Annot /Subtype /Link -/Rect [ 503.58 347.947 538.896 335.947 ] +/Rect [ 127.992 410.747 189.636 398.747 ] /C [ 0 0 0 ] /Border [ 0 0 0 ] -/A << /URI (http://www.us.apachecon.com/c/acus2009/schedule#lucene) +/A << /URI (http://svn.apache.org/repos/asf/lucene/solr/tags/release-1.4.1/CHANGES.txt) /S /URI >> /H /I >> @@ -615,10 +615,10 @@ endobj 83 0 obj << /Type /Annot /Subtype /Link -/Rect [ 90.0 334.747 113.988 322.747 ] +/Rect [ 227.292 346.294 372.912 334.294 ] /C [ 0 0 0 ] /Border [ 0 0 0 ] -/A << /URI (http://www.us.apachecon.com/c/acus2009/schedule#lucene) +/A << /URI (http://lucene-eurocon.org/) /S /URI >> /H /I >> @@ -626,10 +626,10 @@ endobj 84 0 obj << /Type /Annot /Subtype /Link -/Rect [ 108.0 296.347 201.324 284.347 ] +/Rect [ 196.992 319.894 316.656 307.894 ] /C [ 0 0 0 ] /Border [ 0 0 0 ] -/A << /URI (http://www.us.apachecon.com/c/acus2009/sessions/437) +/A << /URI (http://guest.cvent.com/i.aspx?4W%2cM3%2c4d21e59c-fd92-4a83-bc1f-175dc506905f) /S /URI >> /H /I >> @@ -637,10 +637,10 @@ endobj 85 0 obj << /Type /Annot /Subtype /Link -/Rect [ 108.0 283.147 150.996 271.147 ] +/Rect [ 108.0 302.694 286.644 290.694 ] /C [ 0 0 0 ] /Border [ 0 0 0 ] -/A << /URI (http://www.us.apachecon.com/c/acus2009/sessions/375) +/A << /URI (http://lucene-eurocon.org/training.html) /S /URI >> /H /I >> @@ -648,10 +648,10 @@ endobj 86 0 obj << /Type /Annot /Subtype /Link -/Rect [ 108.0 242.747 290.64 230.747 ] +/Rect [ 108.0 257.094 211.32 245.094 ] /C [ 0 0 0 ] /Border [ 0 0 0 ] -/A << /URI (http://www.us.apachecon.com/c/acus2009/sessions/428) +/A << /URI (http://lucene-eurocon.org/sessions-general.html) /S /URI >> /H /I >> @@ -659,10 +659,10 @@ endobj 87 0 obj << /Type /Annot /Subtype /Link -/Rect [ 108.0 229.547 267.288 217.547 ] +/Rect [ 108.0 158.694 420.312 146.694 ] /C [ 0 0 0 ] /Border [ 0 0 0 ] -/A << /URI (http://www.us.apachecon.com/c/acus2009/sessions/461) +/A << /URI (http://lucene-eurocon.org/agenda.html) /S /URI >> /H /I >> @@ -670,7 +670,210 @@ endobj 88 0 obj << /Type /Annot /Subtype /Link -/Rect [ 108.0 216.347 245.652 204.347 ] +/Rect [ 108.0 145.494 234.984 133.494 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A << /URI (http://lucene-eurocon.org/meetup.html) +/S /URI >> +/H /I +>> +endobj +89 0 obj +<< /Length 3033 /Filter [ /ASCII85Decode /FlateDecode ] + >> +stream +Gatm>>BAOW(4Q"]@.VfW`b%g3rg91=G3Y+>f!`2sfa!=b.+BN9JtsCZfC&_hOp<6UfeY^XA4255pWIc@nmQ\6H$/f3Q"st#pWHRrfT$]$+oLX2lLRa.H+)mhQ=SSCl5s(L/m`K1S"R6?\ofTk^_a'=Q.'&[i/_Yr(PC(XA)CH)eplP*,:%MHrRY6Od]r#mZQ?h_=0<4GD[3`>pf7CuM&h?ME0=Fc*=%`"Cd%Js"2qUD,;X+ChU;l"coH)q?^c48GU`'sH1#_d_3@@Gg;)Q=@ZY]AMI^9'<_1$W]m$2a72QL51@:+7]"iH\aN'&f(.Qe3I<08hnu7H/c(9U_cJ&-c0!3R;G0@WDK?JB,Nn7-o.58UJX\RNH4G8SlF=m!+C_ik')$cUm-!Xr+oQdWLi#oBgcNOhU2cSlR*]$_1fWP/OgSQZSsI[VrpeDFdFaXm:q(Y3+J]Xejg,^\Hm>+?148#u/gbuctia[*B7KU1hQUPksn7W0:qcfmK'j3^mMrqg;uo@+>C?3pK\Js,7-)epnAQLB!jd\"`mCSB^G<0.hQijm8:m;7h'5Oq).s+$WaSmUR(/YdG5=`n2IGpIoih4?MXe%FB+"VsQ)T$&/Q;_pP"i.uuN.<5kJGNtiO(5EmP9Dj(Q^@O+l*fhd?A\-@:[,BTbPh#:%70S=^p0OX>gk^7[+4u,[HS%YWr8nfO^kqhGWqR'Q[,qFh=RTr%,>u;9`?*'/k+VR/1,E#I1k-_"PoX62IQ/nbo*XB[!64kM9%s8]FE=^uGE6G*^Nb_M]W]QLo5lJE@]#k*Ym;#-Hj<;[gZ:>;#HZ[1H-W_a04TWu]sm@%;8M-rC0cVAqgh/@>mbNYdGY&*E/_KH<)S/rp/9.$X*;8GarXqn-jQ*A3&eUOX*uF:Yhc^d1k7RWP!80/=*Y:h.o#!'oBloP[ub#t[lSd+,64"\N*i51MWZgM-grgKs00T%=;'&mF[ASMF8#KHO1o','')ZJl+Rl]SZ3#1nI.i>^5QU]b>hZ&9VZ:j6(fQrX^Nc3@>qTl:aHo0:ib-?*JFFZL18apjW0K#)Q=_$EX'Ckj1Ejp,5`RSW*],hrTgbf*m1`q1)@\N$VX-Rt(]IAP3I"W%$(f=OIg*N3;X[`WqW_pc,3b#1TOloS59(bE=."TW!3b-a-STXUE5Zl#`bGY=ns9\N*#?>BtHH;5UL>\sP`ALNm*5lZRRk[35u86W61+ePdRS0="V-Jo/eqS_#-LCUM025FFB@cM3.+hI4[(geq3$C4rB_48XV=\%iKf!5pJ9XV\GOi10h!E`&J2\:nTBQ1IQL"g88]JE+jO(IATG:V,t%)O<=_S\?h#^GsN/=2ZRG>6'*&!HMSDd,bC8\cX%_Yep>je)BA6heSgd:Kr@]t5'PKk<9nN!qD&(-]78S\Rr!GSCUIh&K.p?o3][B247LhZTIu%+"kW._053M4d@t?a]1lnaZ?bmf0lq"A4S*ZmuQ?F^60oIn$9$>;c[ej^N:QK4msXpY-T*6-mGp-_PG<8[F.@T\%=.EA=E],"K'($U=mYm'*;;!`1P1X>[I/QL7A-Mi9=hPbgjA+j'BoeWFUsi,$AL8,YnuXZo"HP4.r'm#u8Z1e.c]c2*6_oO%&]Nk7N-[0MKcFA"/596blX`_%m-IrTWfLV=*70#-(5d%FOh`!0LuUn`e#Fo$h8B#?7ecg/[e96+.FOdBWrCmBI2cJX6FV;TGus3t[=cI'D8uP[QbeP3~> +endstream +endobj +90 0 obj +<< /Type /Page +/Parent 1 0 R +/MediaBox [ 0 0 612 792 ] +/Resources 3 0 R +/Contents 89 0 R +/Annots 91 0 R +>> +endobj +91 0 obj +[ +92 0 R +93 0 R +94 0 R +95 0 R +96 0 R +] +endobj +92 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 108.0 664.8 280.632 652.8 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A << /URI (http://lucene-eurocon.org/beerfestival.html) +/S /URI >> +/H /I +>> +endobj +93 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 369.276 611.547 416.604 599.547 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A << /URI (http://www.apache.org/dyn/closer.cgi/lucene/solr/) +/S /URI >> +/H /I +>> +endobj +94 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 127.992 425.947 189.636 413.947 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A << /URI (http://svn.apache.org/repos/asf/lucene/solr/tags/release-1.4.0/CHANGES.txt) +/S /URI >> +/H /I +>> +endobj +95 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 447.636 374.694 493.632 362.694 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A << /URI (http://www.packtpub.com/solr-1-4-enterprise-search-server?utm_source=http%3A%2F%2Flucene.apache.org%2Fsolr%2F&utm_medium=spons&utm_content=pod&utm_campaign=mdb_000275) +/S /URI >> +/H /I +>> +endobj +96 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 90.0 361.494 226.632 349.494 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A << /URI (http://www.packtpub.com/solr-1-4-enterprise-search-server?utm_source=http%3A%2F%2Flucene.apache.org%2Fsolr%2F&utm_medium=spons&utm_content=pod&utm_campaign=mdb_000275) +/S /URI >> +/H /I +>> +endobj +97 0 obj +<< /Length 3836 /Filter [ /ASCII85Decode /FlateDecode ] + >> +stream +Gatm?flGk+(4Fe9E*cG1.Z5GNJr?8@Z7V,9@jeO..TNf;5i#6q7r5KK7C)LW")u-]5bQe&.M]Qeo:C"/i\%_DD`1\c2cmb"L#/F$s5>F8r4n6NR/\$9nbYAaWLj@!=#9=5lBeQ@o*1$^YQ!LHGTl)HYdqLUHD?#j(OC]W6%0eAhWF&#qto-\UdahOlEB&%lCkq1%s]C\m],P<*c7rJb[=a5Ddpqj*RqH)>:9!o)ms6i>Qjr2AEccMV@e.9c/8h20\VP@3=;BX(+?"Mgr"63+1fd%@LFQ$'FH`0isI6G)/_n1^0&]JG35Un-S,cKEPAn=As2U"V5QJo+D61Mhg*7BCL"Y#3NZ5#P8<(RCC^DM)DZ=i$".'(@mn![_Ko$NVGH]=%OD*K@o(dY(Ld;]/"<@>oY9>Ft.0._N<;3J^ADo_9fQacYu!4<+p"9d(Ft/0Pl@r'="Jt=iO:KpTEa/i4`]^cRS\+5X_SIVLd[q4>6N7"4.!)D\%c+SleiphpFbYEM2K^Ip9YCqn.Y\UWm3M<,Ur_[6-J]Z&j0WT9:%"k"Y5*f@fi#`/;aOt4#VksDE*[lKS[QnB!t+KcdpDXIeB(dGhn-P3hL]VC_9N[\t+mn>?AFS4"T):EC4:PE?fGg0]PW15k&.q7s5.U2^VIr>3)8dXVr`Q!ecXG]Mo'bbf=bMbL-_M!WFEqB!6jFXAbn<:Y%Tb&&jr?[#AX(kgVSRL"fVon.-sE;#)qsB-`p1u%ZuG9F9kLtYTL3\Mb7_iI9:$\oTjBeCM'\LC\:[F9R?h)]K0Xf2@ru&DC^S.1Akt=7eo1dklt/!r@NC&5$^@f]$*u&pHcp*GQQ4855P%:i"2jGU6\!7f#KhSshK7$9YisA@Yf<QR=2N!]O2+s`-?^6R6@:,26j2n&r09-^VgqBh2=/RN3hl4K?-&YqHan6e88IWjV?5O@^..oeG:B_F/+lq"=!FB=P3`$Xig'nmrSd[5/';?7>Y<2'FU06FR##.Ns4b`V+3Gi<8U9k_*UdNM#25?T_NZRHUeKsc'K*lY2/A]Yk%i60o'-:F^s?p]5N]L)1"$74T\oZ3W(qS=@Y7i-QPS'QaYFbg1n/*F7%B2^4hIpKQ8aP-gLq"5,5o]gV,DW%+<$_+F=?*]j7BO2O79pOte$hG/9ebc?n%TJP"("es7!$W-u!BYni_boKDU5-nJ(K$:u5i,FBV)>4=bT\<#8$tp'h!BgG^aR`V,8mJ^>8?V1$g"#c)pJm,a.c/@^kdjQZpn98>\@U@WE%.?Jm`=%@&^XTAm1aA$546ooul.=Bru?L;P9eCN.'lIKnX'6,gr&4D;p6j?gN['M1m];$sc(+q)@TM"$>-:Fk"i.-1MRZX-mfCW$!<)+)2:)X4"^&HZf@'XIbW6U]$a%EOF@=#ebpDcUknIgSJ.CAk,&7!&.bk#3gE4P9j2n/4"AgZ(_Z_>ilqZe00&Cfn\guE[OeaGEY8D"A$r3:'@;<7WR6Pdk'.huDF&iZQl6$*C&oK1.>MO(,)oN+NJXj*hfB]1@jS'5u=5_Hj]&5]r)mq`@'^,_aAHol`lA4b9#JY49+8eZg?&E4^Z]OGVH;kVhV3-AUpQ+ocslbW_ib(#7\LuGCG6nU*?pL?p8\opVL[&..TW:CZHYJ"Imj.W0HcVP?#oUhe*t^ef5EU_ZjB:QAaiOE:h5eJ)p2n6h8&+XoPXIpmInonm-D$#,B_kc'3^)*jrm21(,2A=d!YWX#50A^Eg'f'J%0ohj4WT$Smb9*mK/9u:=75nfmK9e(46ge3F#W$E26;7;47EF=rrH.Oeo(~> +endstream +endobj +98 0 obj +<< /Type /Page +/Parent 1 0 R +/MediaBox [ 0 0 612 792 ] +/Resources 3 0 R +/Contents 97 0 R +/Annots 99 0 R +>> +endobj +99 0 obj +[ +100 0 R +101 0 R +102 0 R +103 0 R +104 0 R +105 0 R +106 0 R +107 0 R +108 0 R +109 0 R +110 0 R +111 0 R +112 0 R +113 0 R +114 0 R +] +endobj +100 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 90.0 583.147 164.988 571.147 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A << /URI (http://www.us.apachecon.com/c/acus2009/) +/S /URI >> +/H /I +>> +endobj +101 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 503.58 556.747 538.896 544.747 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A << /URI (http://www.us.apachecon.com/c/acus2009/schedule#lucene) +/S /URI >> +/H /I +>> +endobj +102 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 90.0 543.547 113.988 531.547 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A << /URI (http://www.us.apachecon.com/c/acus2009/schedule#lucene) +/S /URI >> +/H /I +>> +endobj +103 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 108.0 505.147 201.324 493.147 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A << /URI (http://www.us.apachecon.com/c/acus2009/sessions/437) +/S /URI >> +/H /I +>> +endobj +104 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 108.0 491.947 150.996 479.947 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A << /URI (http://www.us.apachecon.com/c/acus2009/sessions/375) +/S /URI >> +/H /I +>> +endobj +105 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 108.0 451.547 290.64 439.547 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A << /URI (http://www.us.apachecon.com/c/acus2009/sessions/428) +/S /URI >> +/H /I +>> +endobj +106 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 108.0 438.347 267.288 426.347 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A << /URI (http://www.us.apachecon.com/c/acus2009/sessions/461) +/S /URI >> +/H /I +>> +endobj +107 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 108.0 425.147 245.652 413.147 ] /C [ 0 0 0 ] /Border [ 0 0 0 ] /A << /URI (http://www.us.apachecon.com/c/acus2009/sessions/331) @@ -678,10 +881,10 @@ endobj /H /I >> endobj -89 0 obj +108 0 obj << /Type /Annot /Subtype /Link -/Rect [ 108.0 203.147 211.992 191.147 ] +/Rect [ 108.0 411.947 211.992 399.947 ] /C [ 0 0 0 ] /Border [ 0 0 0 ] /A << /URI (http://www.us.apachecon.com/c/acus2009/sessions/427) @@ -689,10 +892,10 @@ endobj /H /I >> endobj -90 0 obj +109 0 obj << /Type /Annot /Subtype /Link -/Rect [ 108.0 189.947 288.624 177.947 ] +/Rect [ 108.0 398.747 288.624 386.747 ] /C [ 0 0 0 ] /Border [ 0 0 0 ] /A << /URI (http://www.us.apachecon.com/c/acus2009/sessions/430) @@ -700,10 +903,10 @@ endobj /H /I >> endobj -91 0 obj +110 0 obj << /Type /Annot /Subtype /Link -/Rect [ 108.0 149.547 516.6 137.547 ] +/Rect [ 108.0 358.347 516.6 346.347 ] /C [ 0 0 0 ] /Border [ 0 0 0 ] /A << /URI (http://www.us.apachecon.com/c/acus2009/sessions/332) @@ -711,14 +914,58 @@ endobj /H /I >> endobj -92 0 obj -<< /Length 3873 /Filter [ /ASCII85Decode /FlateDecode ] +111 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 108.0 331.947 367.944 319.947 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A << /URI (http://www.us.apachecon.com/c/acus2009/sessions/333) +/S /URI >> +/H /I +>> +endobj +112 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 108.0 318.747 263.304 306.747 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A << /URI (http://www.us.apachecon.com/c/acus2009/sessions/334) +/S /URI >> +/H /I +>> +endobj +113 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 108.0 305.547 437.64 293.547 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A << /URI (http://www.us.apachecon.com/c/acus2009/sessions/335) +/S /URI >> +/H /I +>> +endobj +114 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 108.0 279.147 186.984 267.147 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A << /URI (http://www.us.apachecon.com/c/acus2009/sessions/462) +/S /URI >> +/H /I +>> +endobj +115 0 obj +<< /Length 3482 /Filter [ /ASCII85Decode /FlateDecode ] >> stream -Gatm?gQL;L&q-CUd*l#nA^_ni&>YqAI#3&Flb.Ni[8hR!i(+]AAP,om.MMj\2L#6A!!.*S="5hZ_`m5pQgR>Lp$Uf$j29-F2pl4Js*qW%_B@VZ8HSq39R_H88.om-D/->$]_Ua8]1%@:"U*Z[jsRTo#lNZ?kpGbU6MN4f6t%Ps=V;ug]QlJSrq"$Ud2jo)DI/M:k_B\ta8=%4YFGGH?2=*a0majo5ApJ0H2&8u,"4C&oa51AAEgo$?UECQcok$Ohr`+[c-;mgmO"Ul\;B3)8E*TS.5t[s1;8%jgs8,iLK!Y#k#98VLrZF+4m1RPE:Flp;??MBYT^L_OiHn[D?M7d=X(DKo0[Y7?#k'u8]AAB2?5Z9Z7*e=Z9mnkM6t!?aYZ7/:$ed_+^tP*O9God<1+r0T;pnXGK;Y(,o;'!>giZq=;4U,Zm^,R7<5$clKD,]!)'?E:Luo3-3T*")-NY6R3K%]rp185i(>?YBD0r!G\q4f?8,c[oBSW,d0\0h!4/E#iH&)808Xre]9M:IkAK+AF'na!mah<0jEd%dp=Id6+*+fG^`@=&Ut`"[P&;b_,C0=ELD#.!=LIP)RkRh2gdP@TC$d1WL.L@iG&J2CN`a'n$TZF7#e560%Er*HrcFgT(Z]d\bo$s[f5kHZ3="+WpUn"po_Ni#qI6h*%+fu=0Cc41QGMmi0%Q`nhTr;,lVD7cH&[u[;nFEnru8\ipGj+mkQ^T&PL7+@JPpMn-`U$L@g?&E[cc6,eMWIan'fFZP_+<_&ioU7BR\<`Y8F0\:>B+]K24u65o97i=b^bMK4@H?6X"9CgO5$]1f!OErj6^?ET,@;IBE')t-t0`X:8Y)\!"G85dS=>.'u_\$3g1W?LtNYT[Hh,#,X(MA'G+!J;h%XEdaH('JRs/A<&.o":qumH;Xrp0bQe`I'fs@ada=NteBm)H*OlMkd6"*+inL_TkBg&a8C?8V;,dDFJOV19/m6pa2_V9802CLFCe<$m`W^q\'g;g\nsGK55dCfQ`%23-(52VlFIrg`!SbO<$lE)aO#M"FNmZuVAM8:Fgu8k\",h:$2(F#,8ou\47"5f`\$\2Q&e3p.^c0m)>`/g7&*t]b=#'h;-l`gb$qqk$OiL<70K?\*=0/5_ZYL9]H6P1Co>CioA5<*I2FM:HUbp.82E2kE@V(EBs)RQ:+?HSL9)M(86t/F"E1C:5rW!P%I:`V)/"r+$e>c0]X0<8*M8?HCql]Ma/*oE"/ji=t-3dpluhSN?,Q7b9:c39[Uj-m`0f^j+t`77g=IICoA.5AfLrbn1e87]@R&CTns;"/;h^LE;7#0TN#$n_In,IeUM,2o(1M[uYF8^o]sLo'PTI`KM`Kr#iG';t90B\B;c;I/b\:95$[h'10-e8ADj:=d^JYSoV(B/?-.Ob(egY"ROAUOa53G>%NRmYqL3fd#s@ba_V6#66r?glRqIn9T`ZKXJ``gQcZp$(W]=!0"-NT^GULbBsrd5po0Y_/VR]H3r?!.d)sM\qBLg/HcV"u`iJXPQ2W9BVpoojUDHtHr[O\,.5sbdqS*OP)/pjC^=W>n>OO]QaJ;AV"Lu.mL7W2mTlENCX/E;&?5#C>un!W\TZ\L.+LSZ,Ds,b[NCBK!"rnW]3j&J=sBAi[QX(c3OH#S$mY$#,/6O^AR7CeU2ded"o2=*#p@KTl`AUZZZ+u:BQHfQZqaqQ:Bgp,]=Q7]E$$e6."1(@"]\jEGQ6sB2GCP1lMZ9'u8Z.QapO_.@Ff6h=lm;G?)GBHpPk0EZU_5Gj*<':VEWbu#<)Z*j4m7Zp$AHkFW"Cc+_c[5noJ4nJGR1_+!8cPcDRJfPQC!"JCi:?\82^)"4YWs!jL=6*N6k[eFKGrujZ.sC-)kMl_K=!ue_UqK=TdLMKF`qKJ3:a`g^>g8sLTl#,9C)=CP^m20N-chhc%DrZj6ta2qMnduYp8>!Q8joS;fLn1I._-.iQ5Oe<$9Q+SZ=QJ4!\GdNY%gh\^]UV3`O#c*n6G7ol"[>(-U`hfRBC!3f.JcZ!<`2j?JGY01mK:J#r.j4B0WfpQ1;s9aX)&*$,S'+-FAKr_&L1lI[[jhYmA8N9\qp]0*\IV?]3oB>a(h!F^"hT#Sm/B$"I+VgRG%#:t1\M':tX399]t8iTKkCA2*k#.%#B_deYu_s70%0(t'<:q2HYE6)G7oF9A!T>!J\>I=DcSd3:(pXN4>SCcuI:XoWS)!S4*ACgL"p0TlK#(7;IW.,IB[& +Gatm?gN)(-&q/A5n7g(FEZbu?Gk'8Z&pq;,A[@Lsqln`Yc4$LWR3X+)8$`/D)n\'/!^eE><)Q)@IEhl?BQO?t]=Y,9m]QQ%$G6W=pX<.)'An`.)#bI/q>3Yt`["u\f5PYPp$\r*(Cb&t88m5.1]3u+''?GQK2;6leb/)Tn32ZEi;+[NYMc1fq^]FZl0Z-TMU@AZ5P"4ln%Z&T54r^NuNO_Gg#E5+-r7=pOi3)n*Zf8)[7&6DkF\Ccqd4KJXD\s+K"_HQk>Y3q+^,L,\;i,jP:<*)NC3kDf3l0Jm.g?Pja\2]r5ed)NSE(h9736W^ER*a_5FEQkdrE8Fl5Y2Q[n!,`:DG-:Fq$7"N*MlYMHc;W#ei[Fhg[+>ghblGX@lL]IEIKcn>%JYs;)DH*]^fO#6cs-EkfQLjV#L>^m1E\Y*n2=\?PQjSXX.Af67%S4nC&OLXbo+::[W`:FqCs4#_73:2BcP#S'-lhhZn<"QYVQk6M4&SEA$%Z"9TCl7l!'>cZ2F^Uk7W11q[cjVCbq>gn:ZpF(qMU]AdX4)sLJt%dnkh@2Aa!0\n'$J097j5*PntIc%LZM7RgU7[dCPT5K^]0$p8)<^0bGL4$gKc^s82Lak37Xoc06C?)O/J#L'kLlbf@YU<%,CabKOt2q$q;af5g3DE;l&"+YSsaA9&*Am7&!%6kk"tg61&KGe63hM8@&mBICnpO!jCjBjk4P_O*SF*eXC&X<#MNebcS^7a>=1f;Z]sg*F`.g-W?i9F+)tRm;ZD:QZUGGB0n9m,5+T%2^i!+OlAI:5M!fGkLhR!64I66L2K`/X7"^$#8Re+>>ZFfC5FZ,q&K2DI6NQ+IH/3]:.*fXEc<)J[fh:ei&F,1@Y_32qj&(R<:3)6W>-.@#W?"*i0m:Ckk)KTk$7dc;2Bfh?8Tl+FY?TQC-jKGXV@7Hlok9Rh9Q1T/=OI!HO0KT&<+ni=]#dX%d""[`MZ'j;fbE\Y`d/(omZ%$[*cl8Df-',\=:FNjAZ/09%QbG]N1aV[7GK679P9g6't\A4mLc^1^8/%[&M"J9s0`a6!UAc$$%%$G5%+3IVr;%WB%ocY(9"1RraZLs=+Z$[paegWK!%TD(m.@''sk!B<8O;UVA#mHSS12D4@]HFd"cLr4RiUrKsp/M9'TU74mPDUeY6>$_l=>996G4-t4oL`0W*.JCf6Q`HH\65WKV[BVf&I+^o=#!l:/bKUmcr`6.mmZu"7F[t4epdWN-&a'76qk7\!Ac>E=bdGUIM3CZ'_N@'%nf;?KL$>0[^9iSo#444M>MZ2_+Ei[C$1FqKf_dSp%C1K*(G\\bkL+R.J#M`O5Us)N.K&;^l/&'&9_Lo&No9WalahL^J4R@'JcKO?i/)S=b9*,SEI8F0:FV-)L%S3bQ3a>NF*Yku6-6WqorQQWL"h6+Lie#q$%?c8_Enoi(('nJ[iQ;2bUi1ll*Zg?9_FOB+1m^n3VGr]pP:sTER^-'MpV_AKM.*e,T=Q$+FA][J.J/@$lH@uQemn.I,[Qt*(>nftLM,WN1[5HS@WLS/@TAs$!i/u;$3NK<'3q(4#Nrl_6W_8Hr'[O[IO1^U8V8hT/^n+RU\Il$+nqb&4G_^Fd0hlLq&s-dAjR.t-Jc"FWDp\VaeGJgj!Sn9PTOH.TPPWt8\@^+p-Ya8(aD?\Kj*mB/_">d0K+L>X4O4A9\%/.)al;UFor4)X+m1m0h=':TPq%Zt@OJSp0DmLN$h88p/leb_>so$DoE5#s""-#h:](_Qq@.Z>$Pqm:-,V5,K>I43FKQnimu'`7NJA%-u]eiEKa1m2U`W:Z%"e%Q%eW3d_Q\3+Z8-.7*<1lZ0L`be"1?V`K@qBPZ2W9$k:N09DjXm[R?3!=?I[f=7g]r:e$8`W'[9W(:!IPfBm+3*7+-Vsr+Z.gXjaj>H#L$h4hB>>I`TpJ1n9h+tdT`e66@\c6XLX6R&72'>WE`ja`6^S,%5XpQ\>oo^~> endstream endobj -93 0 obj +116 0 obj <YqC.0iiu.)18Z?!YU.n.dm/K,bgGi;Q^NGh3]$6d0uJoKWH$^fTh\Hm"2pCmLSpD;tkJ0oF*l=+EQhtXjcI)Hg^?MJ'W35G5o:]qmi$YFqC9LZ'cG4nikg@74l/_uf"0hNJ)1XkKIf#Vl]Q`R9Dr7C7f]UJMFi!n:c#;==iXaBV+0*Es7)-mVX]Vo37@4@@VRHu*\q&HrjfVap4km3#sO5)gdJ^f0RHiKGsu$,Osr9*rhWi5#RqguZ8X22^!mkl-&\s<[Ln#\amVFlS[AFeYGK]-[gaFb^^eV6mI+Z!IR[ZM=nFu+;4d\j:mZc4-l3R9rGA*1aX;7OG>)A;;`8gS^HF@W9=so+>Mjbu9e,E^HcG1!/Sa,;aMUN.A7p8hrEjF![e8eED9I_o>W*.UC.s:!#GdeM)BG_%"jKQMHXEN6>0jeg3`S\Tfqr*ELAQUCtt[06un$PH,dgq*ik$IInCHBlaAc^58gHD!43U7d759mc'1Sqj26WZqm%0,GRKS^ku/q(Qn],PL(*ph$T@'!54RgC@S5#/#7pA;R[IO:rp.<0Vm+X,nA$B(*5tm%K"N^E7faKgZ:#4]V$Ce"LL'd!SZEN36ND=SRBT@pPnrlrOXnUUTOo7IKWnrgncT)RrKn`@?Dq=bVI;t>\cVb2+eC6#lEMK]7)BFLo&MORO'TUT-E6g[d&B*YR8+->1Sb(-rBpg#(P9jegUOLu+tb`NV'e&3h$J38p+;amV$#XH31rU9@Ec>h^^6tRpADTR($1EbVI,e0n-XMs4RX$G:F)A<>XtpI\(oCCs@ZRnQ';J0ol2Q;cFYKi(5Nn*e<^YT(&NWiRC6$@?*Z=^W-t+=cfi]U\e+J^Q1;C"-+]PRS`kYffj&MmDNH!eU^Aos?_j$e%G*HEH=An19fGf%<7^-gV_MW#^&[dP>kMHB_H8R-TX;$[Y@L$Cu?9/O6_//fJFhW#]Bn6HM4p=(WM0!?B(X(>+UT14`fHkT['DLbKj(-&#)=P@*?sLCV^m"n:Pl_@BUX&[82>YtSTP81@IDWQS@HdTDW`HSUK-LB=Z1;oWZ_VF#'5J\CE`-m4IsPJr7(<`DX]rDXI?Ed0<_:B=`8`@6IT>".,$*o;i8BeS#shTd1dN=Xg/FXeHOq*=qKf:e!r7RP3Mm#o%PV*0kaQ:%@XJ*m7Efki!/QJkgEXn]hes!BqET6R:[6bRTGR].E]NOF(t('$0E2_2YRq1'7p:Kjbh+/boql?6Es@FQ3Zt,W1!(lbXOm"5)EWDQR/m/6S*>Lq,)ZC9g^LRoUu9F'p0mcBp=CuF[NeXN1UH>hs:Q[\FMgT!u*l-'kgb_qQ\GIuPBjjfHif7lXbAM[na_/B__`([ND1IKsg:m7(9<=.2!\\c#J56\R=suNiLC7lV587am-*[@@Da,Y7:dR6/12Mb/qSg)8dq6h@)qKObSGeM@^!!gBWC0G9@:q1LhqsRin<$K#qd4i2$CqG2l]%n-6jSo-3n)n8oE,eZb)s"A.1U'#m4=@*BO$fM8=[A)c'D?'\\$!7OgKa9b.GFd2*ICs"$c`Em8Tjc:8&0'0=,E][ITs(rc!U\=gE+@X:`U[O^>Cp:(8m,.cdF[OT?e&Vk&8E5iH&%s$DQRS-Ai_/9Im4Sdh,?d)4t1DVYEI&QBO8W?Hq#_TiPiaEUeFHI_XS8]JOWT;>s!G0Vjq.Y+Upl!Q%-R/tO@5Q$a&F&qdG0nW.*<=IHZ1'on^4=7Z=juW`IE1edGbN5lWAY0.c\X(N5k.P)`bj,Mj$"CK!4Z*pU<3T!8@o#GKKfp/,f-m+r+.&lu6[^]qPV)A.p;&KmIOpJ>R:.Kd_h*h@fE9et8>5ZB!g4)>$9Sn19OtRU+"P8c4b+R"\.KBpN#LP@>EN6WJK<:!_3e\MSW;=#g8SFP8Rh'qOAQA''g"ZlV)C;+]@%=^5qA;5=!h`4rK8HI$*"3LW,peu_%G&&NF5!XUsal@Y[im"'ZTm%D-'&%nl",*&<"Dnnb=Q;g@k0?5$A[q6;Bc]nDC!EJQFau:;[[p[9&N_Vp-QkLd?E"[^+3A*eTAL9@*bjoW=XXlBS;"593>SSm7]5W$lBACOgtn[g.YJ<3ZtVk5o.AcPjD*.fe+">b#b9;>8[a`gt.flJR'(K$"Z5L5^^-D'R'8m<,`Bs5W-im?;<3uZAKn0!r&F$I%LQLh+2/!N`]R6Jcdr`EUJA:Eq+]%c>l#7bG)jqZS)#>^Xpn'l`3/DWE1>Y?5p^7.\MR/!u)qY"a6#`4&3X=YVhY6fU7DA)@C#pk.i`j-i;$nF.qY:bE<=B!PJ6UXqaFBAlCm21ss^-TL(jL$Dnt5eK[dQ^$hj;E.sp<#D5Rm1mi=I=jf=#2hKf"cfCls_N2Ga`op)ZU(Ao2mr&A'92uuYnp4ES\::ti,SCt5`!Ir;T9fF_D>C5,,GMd`hZ0T\**q"["/0/=lU/CRrVEMJ`*#?[t@hb2fimGs;/B@@r4[IArc0_]P&So]!Sj9BX$8kI`mB:>7^J^5o^"7=dJAmR?EnMd%f`F!'/nW0g@9Y5,u-'u>cc"rDIT6]\I(XDm1bfeUdAh5L+9jKSj"N1-l$e>WTlLOn_O&P9uP!LiV)ElNX8HBp>T>%uF?e?+!AHR+!&2WIm7uW#qc(U?UcbJD?iD\BO_7ZMibDi]&/C`_=piBjmOG/0[%1G22"C3SkhDlG%5,lV^f0J?b'od?U%f%uF7!YJ7]3Cg76Ldo%#p7r]<(]IXfB9kK*$KM^+RIh#BLOh7o!9XTV`mWcr!foC8"n:NREROu=%+M"%0(8Oa2X&[hHeaWc*&i%7]L4pDbVK8!SrF\.SY/,SqGl)O[bpu\5uXq%q!0#o2>Qr3(VG%E`,@P])RO3T@>#)C?aO5KnrJB9Lc1;Bb.k@jq;8es&!$PIB7IM)>p:E-aN$C-NG"dD^R\&e0HSHSJ)fgZNt/`>GekB)#CKVre?k..Ib)H3n.2.Cb_IBME9Mj7i*U"bWFts]8aE9L7/4UuL1ui,0C#1ZV$FV3[ReSZ=G#W2j+rHN=6b=NnZMQP6Y:gZnAJmj8%m8ofq-2$?Z!Fe^ek+n?Q=+>MT-73%%f-sNjVOToC4?o>Tc,d(Tp&2J4=p8oI,g1_H%B9A@MS%IQ6irqnF=%a4mIUQU(?)`H_oB$1_qca!::C]mK6"^o%VdFq)$?UR:Lf)S^ajKsmtMcFHd@WJgjpn4,S0^WJdX:Fq/[E!j>[08P3VcUP5c(*]SX?5YYTGuP&@C,ShQ^r35s&K`aCAP4/[>-N0K+BN"><5$2IA7N#5E\EWHIorcRATgN'EUu(0iY/'Q#=jJ-l"(3q,T6a8WhshBfOK)9=qu(B`.n(Igeb)b%OFiegoe#U0:m"TH(+7!P-(]Eo?>tc+B2sia)/R'PDS_?A&b*5/RfIa$k(aNC]=0s2rgY8Khq1m2&3RuD)ZPNRS$4J;L48+(+]i/dOS4M+[6MW[f4f%*F6sJQ;ks9t>qd7m1,EU]'Y-JO;kPcJ@4EaWbf^8M$cYEB@giqkoa&bA#FQG^':5^,los_*j]$06:=.)EFN=@7_>gAI)j'[W()7d9KPWt@2Q'qOPR)/a0jr]HJ)hTVj%#[!IoQMW-Nq##kY(u-GC2gFi<$)s.kr5k>F9i!*@8oaqmt'uq8=m"5GA`e;L3ns:"YpNE't;R?i-&'?PS)!a?R]0Z?)$D,21%'+GSqn'qo>(rYI[Sqlj9VJ.^uKJWODaPh`OuK[:G#(&5ZPg0i2POGZ(13)R3hf5TV1`=4f`\a82coF($U>b)]EfF+OJN3TsE*@]Dg7XQEH=<:kJD1eE7B8p[jF)YFhN.TO\\:=o?Ro'aldt!oOK"qQ:i5gmC9W@G.m0ed]HK\m`atec]E)T4So@QOM,R2&B?lds@++b`DV9n^]/kp:R,:!O*/U9k3^m,nU]%+UK)#]OR1Le>`LOd]E&kc70sag4!lU*-M$5a+7tqUi)[J7N#"J*EFtrtIiQnB'C401u2@=UA<2$g+t8E@ZoR;guE6=[9"bmq/ufF_!364X^-jT-,A(>P=@&,m.B/SSPX/cZZ;Ip"DFK&?]f!$bm&^-JODQjBSaK6\la`fcf&><%=iFHjXAA1#<+B8V'!OtEEn9cqZV^h`*%Z4L>KgTk]SBj@1U/<[2JdYU!"H6`#.KITmTA$hk-p#qB"E/IaE*ICZ>Z6)l<@\M?@\(cn5!o8qU!^CDd7>i8k&%K;b@tEOcNqDr:<2+;3iGuH@uX6C5W^[47]Q"\NL2c*`F+N,'k=(l\:'AEnq*M(h&O=1;2`bjf3AC22dA-CjJd>SSVF)lq#'?_ZfBr-p"-7BO+7,5SW>lArbl%?F@e](33.HoELW!qngc(+n>.@Tr).q5;"A_6,9@6igqIH?9+G,pkTKA^#c*TG/fS+>r$VW6s4rd_;dkQCBPXe^$4W#18uaGI^Om+e?48lc8jC+#:1;32qlbHi?RpX/^6XP'\-"fpHb[>c9>HC;&.B@^(`5+s'819bE9.M.?h;*oh5+ru+2o+Z;1XrPIOD(_d"p]G'HsC]=O/i01Q6-h0F0#T!1N!F0kbiq25RCtpj*N:%nM4)kt1UN:XL6(U__a)`3e(g;I/'`U5/IdJ<#T^D?'#ln#ZGg(^n)tsIe\9Fd$DqGnAVJe(aE?t_DG\c!2&IS*s(poS&^,_hoSc1`84iikihXTSFnTH\tD'XU,t8Tfc43h%mk9qp^t9$EW=4ElLS"*]KhW7-4$rFdm.`daAD,3%-Chb]Q`Sd#n)*o&;tG=CH^g"d=OAo)b/81`*fkUFRAYV\'&N:Fd1326m\(u[/'J]S\)7X_JrWZU%SSH@>5AY;W_](g#JE'Hf$t!Frm(c^HZm^jiqOo\:pcTbM*fkVmc6BDE4TF2\TDf=c3B+*I$*W0GZS;Jh!;CmG#*f)d[#[B:q82!6mgII^1FkQnaP]JaDeD91*B0I6)s@n,i5(%N;EatQm6C;)"^sV-aoLf,;?gOtQSJPf^Qdc04eG"k/_tgqY52rVZdJs.$-)+VG0[>S-FUK%N?64@-%3%OJ8O9C\UNcsHcG/YrrP>8`"d.4DDDS:8)!Kb">aE_l*:%f.=%k1h+sIn5(C^;,7W`iCE.i7imOg6-Z?;4(O"dd+MH_2ok*mL+N>l1SXtoOmPL/$h%Bm?q82F.[C7r`,Ai`S,fV+ROB0g;jKi>uil;Q)A8V<%s2bY\RI&A:`&AKb.lDfBr0ieq(Nl[>6qK45^=ZBKMb)>&[T7]]#h[BHOJ[eVsKE4!^I@d0@`tE17$7ZJMo5_-:(5##eQK9;`hm5GTq3d"mjkuXQAq[k9:bq0%8CIH2Wi-87'D]'cqoX_kp4t9Vm:Eq=V,0hUPdbQ!],Bhp3>km'$Ha8),VeRgEMRC^@iTec%`mNc3_*0kI[6iMIh'@AP17kY&q@5i.JnXuDK/YQG\,V3s/OG@V`b8'lVdD4b/i1WtUXMrnDF4A6o?h4dq_6eE7/g=f_aeDl,Y[Jd9Q&'.H$Kpfmp!27/**lCYPC+XM`"f"]^GH!I:?78sbYVY47BhDeur4e\.M-7.7dm.Rj-oiKjs=dO]YB[HXu@jFJeLURlmWG!]f;Ud.-B=&pjgEAGBBq0(\@C;>%9[]0iYM`aCi022YQ$?T>PmFlBS.m.sQG%-[Kl'H,O`CEsu>'k)CqHb);c"BA@SHmHV;klUIQ#I*6I+;c*hIiX/'b62`1=4LpZ">sp&!**<2g"VpS[$G5Q!ILLS0g[#lNI5drlq/ot*b<'c@S^cbfNgp0"fb@ZqZ(e5$A-3Ub5NM(BgH?/Fr8>&qXl6p(@nD(+tcg2eaiUd'R!b'H\tZ"'R'*1U^;jj90f&!*FJ%Q]L3;(';oGQeH`=Jh[nCl!2ig)Dlu?W-rpJ6XH&@/pI`bE1tkK4B!rCa1To;l,s6l%WUNoRfE.kQCG^j-:^:T%8c:7"lrc+bS:SB#kqCtiGtXj!93cUX_[_B3?RBS[P&'+59fq[A44%-lNuEu+nhlCE0EpB;bCd2:p-L4hCZs"8>fbTA\/B5E!?1)%<@qfL00PZC_d;Pcs1JO]$;]/1[_7uY'X?LYq!HW%'FY/m;0RMYK^;QsoB4*qceJ>njC*O]h^\f:%r)=BJ_HH5!;6HG:RsNHqoJp:]HsW=0=>`SYbM_b]Of>'F4o'JRd@h>mlr7/:Qm,mBrgtX/Dg!Rb=@VB<-PH,l!R;N\K!VMe-&R\#J2oU`F0!^2cc%qb5YY+C@36]!f1]=i,N;XUm[aaBuL8\J*WMhmb[eRHAT19E;CSlSUj_Y$[@UhWZfsWal^>g3k>Vkb677!??=BDgi=^GrbKP7agK&0#2Wgf%B-a]p.:nb%.L**g-ZO\hV6gh2cM3W_]#2hkL!E@NO[q3IA@-Gj;rUcFoh,4oWPc7>>$D:>l'U8-7Dt<3KG@)N9LVY0`AT`aRc80460s2rh^t4XOj=@)#]ia2<47d37SC[i&/?@CPfu$WFLo5G+Y%"_%6V5,tW[]*d2!Q?,MF,*OjkF=,3e-Wf,4%oG0e**'_NN\t2T=B+Hc6=`R7l38%k6m4!kbJPs%^`g!c8qRH1!qm2ToG4F:kh>d1,Ckm&%Qk(DnuAdRmE"o1X0udqb8'4eJCliZ/-pFBr;`di;&\92/@]U.9#E.h0L5_14SkJKKZP3D5J$soor4R]@%a';b^GnG2kbQ13F&aN9)eUZ/>G2V"qD].foa`:-WL7`o.F<92@&)Om/.aTJLDrfd-G,p'B_'PYLHua*P,V=M"r&ieQls5rI\$qZDAg9CTg6(s`ImiFp"V!p6f:hU6,OSHnk.<[S_:'cPR-3/E7p0 endstream endobj -94 0 obj +117 0 obj << /Type /Page /Parent 1 0 R /MediaBox [ 0 0 612 792 ] /Resources 3 0 R -/Contents 92 0 R -/Annots 95 0 R +/Contents 115 0 R +/Annots 118 0 R >> endobj -95 0 obj +118 0 obj [ -96 0 R -97 0 R -98 0 R -99 0 R -100 0 R -101 0 R -102 0 R -103 0 R -104 0 R -105 0 R -106 0 R -107 0 R -108 0 R -109 0 R +119 0 R +120 0 R +121 0 R +122 0 R +123 0 R +124 0 R +125 0 R +126 0 R +127 0 R +128 0 R +129 0 R +130 0 R +131 0 R ] endobj -96 0 obj +119 0 obj << /Type /Annot /Subtype /Link -/Rect [ 108.0 651.6 367.944 639.6 ] -/C [ 0 0 0 ] -/Border [ 0 0 0 ] -/A << /URI (http://www.us.apachecon.com/c/acus2009/sessions/333) -/S /URI >> -/H /I ->> -endobj -97 0 obj -<< /Type /Annot -/Subtype /Link -/Rect [ 108.0 638.4 263.304 626.4 ] -/C [ 0 0 0 ] -/Border [ 0 0 0 ] -/A << /URI (http://www.us.apachecon.com/c/acus2009/sessions/334) -/S /URI >> -/H /I ->> -endobj -98 0 obj -<< /Type /Annot -/Subtype /Link -/Rect [ 108.0 625.2 437.64 613.2 ] -/C [ 0 0 0 ] -/Border [ 0 0 0 ] -/A << /URI (http://www.us.apachecon.com/c/acus2009/sessions/335) -/S /URI >> -/H /I ->> -endobj -99 0 obj -<< /Type /Annot -/Subtype /Link -/Rect [ 108.0 598.8 186.984 586.8 ] -/C [ 0 0 0 ] -/Border [ 0 0 0 ] -/A << /URI (http://www.us.apachecon.com/c/acus2009/sessions/462) -/S /URI >> -/H /I ->> -endobj -100 0 obj -<< /Type /Annot -/Subtype /Link -/Rect [ 252.5 418.147 377.5 293.147 ] +/Rect [ 252.5 537.4 377.5 412.4 ] /C [ 0 0 0 ] /Border [ 0 0 0 ] /A << /URI (http://www.eu.apachecon.com/c/aceu2009/) @@ -815,10 +1017,10 @@ endobj /H /I >> endobj -101 0 obj +120 0 obj << /Type /Annot /Subtype /Link -/Rect [ 308.94 418.147 410.928 406.147 ] +/Rect [ 308.94 537.4 410.928 525.4 ] /C [ 0 0 0 ] /Border [ 0 0 0 ] /A << /URI (http://us.apachecon.com/c/acus2008/) @@ -826,10 +1028,10 @@ endobj /H /I >> endobj -102 0 obj +121 0 obj << /Type /Annot /Subtype /Link -/Rect [ 108.0 387.747 201.324 375.747 ] +/Rect [ 108.0 507.0 201.324 495.0 ] /C [ 0 0 0 ] /Border [ 0 0 0 ] /A << /URI (http://eu.apachecon.com/c/aceu2009/sessions/197) @@ -837,10 +1039,10 @@ endobj /H /I >> endobj -103 0 obj +122 0 obj << /Type /Annot /Subtype /Link -/Rect [ 108.0 374.547 186.012 362.547 ] +/Rect [ 108.0 493.8 186.012 481.8 ] /C [ 0 0 0 ] /Border [ 0 0 0 ] /A << /URI (http://eu.apachecon.com/c/aceu2009/sessions/201) @@ -848,10 +1050,10 @@ endobj /H /I >> endobj -104 0 obj +123 0 obj << /Type /Annot /Subtype /Link -/Rect [ 108.0 361.347 243.972 349.347 ] +/Rect [ 108.0 480.6 243.972 468.6 ] /C [ 0 0 0 ] /Border [ 0 0 0 ] /A << /URI (http://eu.apachecon.com/c/aceu2009/sessions/136) @@ -859,10 +1061,10 @@ endobj /H /I >> endobj -105 0 obj +124 0 obj << /Type /Annot /Subtype /Link -/Rect [ 108.0 348.147 231.324 336.147 ] +/Rect [ 108.0 467.4 231.324 455.4 ] /C [ 0 0 0 ] /Border [ 0 0 0 ] /A << /URI (http://eu.apachecon.com/c/aceu2009/sessions/137) @@ -870,10 +1072,10 @@ endobj /H /I >> endobj -106 0 obj +125 0 obj << /Type /Annot /Subtype /Link -/Rect [ 108.0 334.947 362.928 322.947 ] +/Rect [ 108.0 454.2 362.928 442.2 ] /C [ 0 0 0 ] /Border [ 0 0 0 ] /A << /URI (http://eu.apachecon.com/c/aceu2009/sessions/138) @@ -881,10 +1083,10 @@ endobj /H /I >> endobj -107 0 obj +126 0 obj << /Type /Annot /Subtype /Link -/Rect [ 108.0 308.547 243.648 296.547 ] +/Rect [ 108.0 427.8 243.648 415.8 ] /C [ 0 0 0 ] /Border [ 0 0 0 ] /A << /URI (http://eu.apachecon.com/c/aceu2009/sessions/251) @@ -892,10 +1094,10 @@ endobj /H /I >> endobj -108 0 obj +127 0 obj << /Type /Annot /Subtype /Link -/Rect [ 108.0 295.347 321.3 283.347 ] +/Rect [ 108.0 414.6 321.3 402.6 ] /C [ 0 0 0 ] /Border [ 0 0 0 ] /A << /URI (http://eu.apachecon.com/c/aceu2009/sessions/250) @@ -903,10 +1105,10 @@ endobj /H /I >> endobj -109 0 obj +128 0 obj << /Type /Annot /Subtype /Link -/Rect [ 108.0 282.147 391.608 270.147 ] +/Rect [ 108.0 401.4 391.608 389.4 ] /C [ 0 0 0 ] /Border [ 0 0 0 ] /A << /URI (http://eu.apachecon.com/c/aceu2009/sessions/165) @@ -914,14 +1116,47 @@ endobj /H /I >> endobj -110 0 obj -<< /Length 3007 /Filter [ /ASCII85Decode /FlateDecode ] +129 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 232.644 257.294 286.644 245.294 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A << /URI (http://wiki.apache.org/solr/LogoContest) +/S /URI >> +/H /I +>> +endobj +130 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 127.992 145.241 189.636 133.241 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A << /URI (http://svn.apache.org/repos/asf/lucene/solr/tags/release-1.3.0/CHANGES.txt) +/S /URI >> +/H /I +>> +endobj +131 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 419.592 145.241 491.232 133.241 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A << /URI (http://www.apache.org/dyn/closer.cgi/lucene/solr/) +/S /URI >> +/H /I +>> +endobj +132 0 obj +<< /Length 3130 /Filter [ /ASCII85Decode /FlateDecode ] >> stream -Gatm>99\/>&\d_;i$kq3Zk*3=I-?f['If%ea2tLE6]qOa#!:"q*,\<-7f)jnnC[dJ$ob#'a[L1%?[i(P$-_.ZRm0*lr:bpL8,2iK-DcA";&$kHL"+GX16b2L1oOEY]D:4+gT(CL+9HuiqL]1nLHdm`hU##R_LF-#"XPN.'u<6Tr:@P_]IWUIqr!ca5i6.Nq,s9BqWg9!j1k!2r;9Zf:P\CkIg6L6mWnNKK"ldDDKlH0KE.j-#Z+\/>R9@a&Q"Y334K+c4o%CT4pD\h)$Nm@:+i*'#-B]TlWccp2FPJT8/!r#`,u\]>h^Xf,&?.P:"$4Z#pF/A,kB9*,!1W*ZBtKuHshklB*Lf^P1^6bSrc[Z=4s:g:PP.ao:RnOVTGl;1V_/]o2!nL(o=]Hf([HWbHAfM>$M52\TQVC5IEqC-gD,H'RW4$:,"4Uh,t^_fLb-P6AbG8lT[Sfm(;(/&Pm7$ut<[JJ7&s&7@-G7&Z'a1_O+fc0+6Co)S>p8=tJB)n.dZatLgbo;g113?m":$"*Crlqr$>*,86=l-TZ)'o?(4O4q#[p(Gi:^U?L6r7-U6tGLC/qSQC0A]+<;a#[(hH^BeNPCcnY/GAm;?!,-2PE^L'"*>1cA.NM"$buh#r=BGP@WpSMjt,d99JPeg3Rr!MkZ>F%XA7J.%$YiEd'^=)cR>/ZV2I'?CR'@6^%D&^;I5N-Ztg^J$$Oi:Q7.@8I@YCMK,Yd`^Yg>$ZJ[N5L&VQ6YR`s0]kcRjM&9PrBn\ZpXPPa'gcG]Q1#O0%^hY9.+[]joqec6]G;nr?53CK50Y5@1KZt32FI]D>C%D=[Z`-!qmNM0hJ<:H.L8[R<=9(*!;\aTbsTQAf@`onM!_gUHsZ.)W#.eI[j]\^+eu\LW9DQmCFg(;?[E/oVXp*B(Y-5HU4th$0Nia&22lg-EFH2P9sPKI$U0&iNF.+==7T-t;MK=US?4MZ'Q@&?uZF0p=qZH_'OLMV't+-VD<*`\$Wh,$r0io_OU[H:2gb_\8JI;h*aN#&fY4!M.U#07('Nf.IGD.ffSOoO`7:?/D.p%&p+'?6`&:!d72<+CpL3//MYIZ6]@KDl8AV9BkXIeBE,IZrpUBA;3#&E@DIKttCE./XdqJ$t)!][cGjSTep,$TTHaUJoaA-U7CsVaq0c+Yp>:?qN'64E@Y`N(8Xi`WKb)TWm\:E5aFa7Trb<U`dZc"^^*"B3idbl;=-(8o)e\M5]2YJ(;?27*_IRlW2SRk/+9rU5L-I:o>.!/)YBL;8fFWXWpFT-r.F&Bs#%(/oB*6J(Enf8N!bE1RF>CDPFq?O`\lt<-c]?)T4&j(FXMI!]i=Di]WZ1F@'JJp:2ePR!H_gjmXS1hsGVbn[Qq#5]\=70Pl)<=eXePN+NF29U(h,B:hs.?DY;cW0Mf>F21d/(&P-W&?8-1!@t2=?1*O'!UkZ;7+RkVV/ibEWYA&U\rNMuIU=?*Brum'<9SlV]i#`=mZ5R(*09[^DFLFUr.qp2SW`AM%;Y&0]ZG$laFdP)/?e<8H^6XDg1tM63%7YHfI)ZiDA9pE@:HGEP@1mtY]YFflL#g"dfrFY3&lQjo2)^:q6PG7qlBo@=fQ/E$Nma/`tB;TFG%2!oaYFIheEMV&CBh"k$VKrf3B\6mkY&tI\f!]#IgY1&q1&RGFe[fro&^6r?]o(&:'&1KI)nJ8j&-oQhFZ+X\sBq]eQSd`EuH^n>i8-TVR)'=<^n?.*:>3Er(Y$HqXRR:hTC`\Q`8,[*\HKYU/KoY3H!ld/@oPZuN>n7Jleo+_Vk]AasX;Y)g +Gatm>gQpU&'n)^Xi)>i@'lf$K07FkP[HjV_PKY:tB`#T-+Ghs).O6m<^[D2A2ulO+dq3mY-6\qn2o'J:U%]'VIsJR"rJ#P%q"s;&$2hRH_[uh/6="L4j*i6dYEIc&btg:eH\9+pP;lsAqO\FqrYIj5HX:jaQ9Y8YJn:u7Z.;+)rW:s,5s88>]K_XGL)qlKa9k/U"G<&pn#(76ikgPO*r7bd5:\+4nbUYiD,0RT>\in1Hj[g!]+J8>O5H7tcVtqA$hql7pF6gr$TC?N]/e'"sgZqG0n:PVIm1gAc(S1nc++`O^\@MQC;")`lYWr9(f.E)h3'7d:&YrX$DIQf4F-c"cmLajk?^aS&14M`"QMAXg4BJhSVag*u2:>Fk<4%^R"Yi[f`5%Q<0nmY7iJWAL,C/SfOn\4uG8'8$Q1GFU)*uW.d$'hE[WiL-lt2hU`&:!l=#=6<(_r+/8FeG.93NoL723=iV@]F)_+Ck/lY)0_Fs5;6`F6dnRsm<.)@aSLF3KBk0RYf*Xi;.EL[n"PNtOl(gI0qfdJL(:\c#&7]KbI@@m/hd-l!+_:VaBf\!\o'_P7/%)A^%uS)Z1Ar56b3)[[lr')CROZQWg$,/"C0B.kGs^@Ij/j%gN.99B]K'![PAhJ3.YF\dS*IDEmcs3YV5>mtN!e\HHRM[fG!EUusK/-CEQ2LocP`2If87;?*lCB5rcaA"OGLhm@nY8F;#TOuV\ld4[#RWNOr`ff!;#p8=7X`J/LD.0We@b@Wq.]q?=Q)?4bHUr/NN&P2rG?p6^rc4&L7Q.^aAm[apTP:=L,,.2qRjk$M&t5A:jNh#GkM]hAE(%%fQ[)n$)tg4&o.NX/HJ2)o)udK:E&F`5mZQU!g6^@<6sjM/I>.*uj_6@u#uBdMrdlkDnL/iR2,k?==G(=L')HK#pEsgW@#Xpr!$Jf`iA5.uL65I>E^W2Kgj.:V;7s0'G7"r:&=iM=GjqG-=d'jkWg-oh\J871"fFh@#:,6QfZhX->_(93JjUE_M01JKO3919sN?Irn)s*7'W4p$5)#(OA)0>1hn%*Ok+-l?#VP$;/j='7E62U.S?e&59t_FZWh&/KeP+l_^.\Uu2/HOoN9p:-pK6<<8(`6C/&O!-Cad&SM`]%k-Od$lnNC!QkKNddKtF,)e'":l+^=lBa`.9>1^E3jS%A9I`9h-:U!b5`YE()hs78_a@,n)NEu]q"PgBc-(Es82)%aGkfCS+0rq$OIkcpEDc6g9(&10s,HmIBL`maDFj_Y<;ree_=i"i-HVeV=lUK[L?>YSlr[=gq=b0(\kJDk*;3-]1hRK2+aNjK0T=K2u4hg>ejnQC@u,kV.T1[/mi8DNGtSQSI%5_qqm4l)U7C;l$sN`O!p6R)"SO\>c>:_M6t_MClNE^Urp?h6&//\Qa3a$cV^(5>'o2pJ+mqo5u;s%].V6If^e5C"G-(^ArDo#1%>;lHn7]AV\A:,CbkgRi%0U&@=RgS\^bT$f/c+*5?oN3af3))UJd:sW2a/J6XGJEmV6-hnuWB)JqA+0&idrjcNM.u0Yu5(k\@mWb\Yh'FpboBrZb=J5MG#JuD]k0Z^Z`KY6m:n=O@TJ_1o7246,)8J`G7B,$AX(-ij6(A%T0Dmrq7T0@\6F93$RXh[m0Tb:$/[]/I7U5,RS8[L(2s6?Ul47!"'o7&q0Z\4r#Ppp-\V:=n2])0']us9FiIiKTq[nM`o-Bd-:o7WuUe*NDOjSLM7/]i$m!U endstream endobj -111 0 obj +133 0 obj <> endobj -113 0 obj +135 0 obj [ -114 0 R -115 0 R -116 0 R -117 0 R -118 0 R -119 0 R -120 0 R -121 0 R -122 0 R -123 0 R -124 0 R +136 0 R +137 0 R +138 0 R +139 0 R +140 0 R +141 0 R +142 0 R +143 0 R +144 0 R +145 0 R ] endobj -114 0 obj +136 0 obj << /Type /Annot /Subtype /Link -/Rect [ 232.644 660.8 286.644 648.8 ] -/C [ 0 0 0 ] -/Border [ 0 0 0 ] -/A << /URI (http://wiki.apache.org/solr/LogoContest) -/S /URI >> -/H /I ->> -endobj -115 0 obj -<< /Type /Annot -/Subtype /Link -/Rect [ 127.992 548.747 189.636 536.747 ] -/C [ 0 0 0 ] -/Border [ 0 0 0 ] -/A << /URI (http://svn.apache.org/repos/asf/lucene/solr/tags/release-1.3.0/CHANGES.txt) -/S /URI >> -/H /I ->> -endobj -116 0 obj -<< /Type /Annot -/Subtype /Link -/Rect [ 419.592 548.747 491.232 536.747 ] -/C [ 0 0 0 ] -/Border [ 0 0 0 ] -/A << /URI (http://www.apache.org/dyn/closer.cgi/lucene/solr/) -/S /URI >> -/H /I ->> -endobj -117 0 obj -<< /Type /Annot -/Subtype /Link -/Rect [ 252.5 370.094 377.5 245.094 ] +/Rect [ 252.5 503.347 377.5 378.347 ] /C [ 0 0 0 ] /Border [ 0 0 0 ] /A << /URI (http://us.apachecon.com/c/acus2008/) @@ -1033,10 +1234,10 @@ endobj /H /I >> endobj -118 0 obj +137 0 obj << /Type /Annot /Subtype /Link -/Rect [ 308.94 370.094 410.928 358.094 ] +/Rect [ 308.94 503.347 410.928 491.347 ] /C [ 0 0 0 ] /Border [ 0 0 0 ] /A << /URI (http://us.apachecon.com/c/acus2008/) @@ -1044,10 +1245,10 @@ endobj /H /I >> endobj -119 0 obj +138 0 obj << /Type /Annot /Subtype /Link -/Rect [ 108.0 339.694 201.324 327.694 ] +/Rect [ 108.0 472.947 201.324 460.947 ] /C [ 0 0 0 ] /Border [ 0 0 0 ] /A << /URI (http://us.apachecon.com/c/acus2008/sessions/69) @@ -1055,10 +1256,10 @@ endobj /H /I >> endobj -120 0 obj +139 0 obj << /Type /Annot /Subtype /Link -/Rect [ 108.0 326.494 186.012 314.494 ] +/Rect [ 108.0 459.747 186.012 447.747 ] /C [ 0 0 0 ] /Border [ 0 0 0 ] /A << /URI (http://us.apachecon.com/c/acus2008/sessions/91) @@ -1066,10 +1267,10 @@ endobj /H /I >> endobj -121 0 obj +140 0 obj << /Type /Annot /Subtype /Link -/Rect [ 125.664 313.294 266.964 301.294 ] +/Rect [ 125.664 446.547 266.964 434.547 ] /C [ 0 0 0 ] /Border [ 0 0 0 ] /A << /URI (http://us.apachecon.com/c/acus2008/schedule/2008/11/05) @@ -1077,10 +1278,10 @@ endobj /H /I >> endobj -122 0 obj +141 0 obj << /Type /Annot /Subtype /Link -/Rect [ 179.988 208.441 273.312 196.441 ] +/Rect [ 179.988 341.694 273.312 329.694 ] /C [ 0 0 0 ] /Border [ 0 0 0 ] /A << /URI (http://us.apachecon.com/us2007/program/talk/1859) @@ -1088,10 +1289,10 @@ endobj /H /I >> endobj -123 0 obj +142 0 obj << /Type /Annot /Subtype /Link -/Rect [ 224.988 182.041 356.64 170.041 ] +/Rect [ 224.988 315.294 356.64 303.294 ] /C [ 0 0 0 ] /Border [ 0 0 0 ] /A << /URI (http://us.apachecon.com/us2007/program/talk/1992) @@ -1099,10 +1300,10 @@ endobj /H /I >> endobj -124 0 obj +143 0 obj << /Type /Annot /Subtype /Link -/Rect [ 230.988 155.641 495.276 143.641 ] +/Rect [ 230.988 288.894 495.276 276.894 ] /C [ 0 0 0 ] /Border [ 0 0 0 ] /A << /URI (http://us.apachecon.com/us2007/program/talk/1943) @@ -1110,39 +1311,10 @@ endobj /H /I >> endobj -125 0 obj -<< /Length 2923 /Filter [ /ASCII85Decode /FlateDecode ] - >> -stream -GauHNgN)&i&Ui84T`!lQ>)b6q>l81qCWESgf?#NG*]ug9J094tVQ":Y:@],bCAj5fW8E3>>bLolm!j6^Zc3h4mW%f5u@5<&UqD1i0_07^&:rHR)^B>KNqp+s1a_hp9D3<[9b.O3N@j$6uo)&ViHC2A>C^O=6-:Y'290PdW[8a&+3ZM]_54D\ZBoY_,=?6O!!+[i1pdUO5P@AL%SYXP41Z5c[DLIPCd#Tu!RVq0r*Z]_nQ\PuQsg8/8KZG=e5LS'6aPX*'W!?rVd]qa_M3b^EY^3jX@#:srZ6N[]9'q"OW@R-iA.6KPXtR9+*,a+qj-,""f`:aWe$WU]<1Qc3[TebaaKCm)@X;'jt]&.2Gp5^c(PS#^mhXcg7E4jMIPOV8c1RKAPFa?C?fY@^M&9$iC>FY[0=mV?P`oVQ>CfP>)k3;qlFf/gf_kD_mMqP!G?rb8^OfcnYbH=.N[U7%D8'&M?iI.kd-JqhT=o&:RjM!&4S"$o:eNnW&H&D?4L)!J];c2O&b95ugLpu!Y"JN_:cWLA/@"c=E&1U454F!Wc'5"R#:)UF.0`tY6VT-`Q!JTr,QogYA5U-VF?rCRmZ/\;8(@.!Y&Aj-W@M80<)8ZqCVMK1te>ASJeUtlC7n!qmc*@4&GH(iJqlXD1$Q'MhbN2o\E*(%KXuVY\)Bu^eng5fJYW.M'YAnGA)*LPh=,b>V>U,IU7;gbr)#Z5-$85KjsMBT>#*.E2l/"L+n2WO8QeHM*PLd:hfomg,eCLLm)5[Z;ZA#@ZT,a!U@/MU3rMk/Y2TnZFp#eWE6R7\WQB"F`iH7A;JqOgX8Hr.bKhh*51IUsJ+9Mc%JL9H3",TcCKT^:W8VsM[b'D:9XtkA:\BI((,LYc4bnuNF`9Xb-(1JGaG#FXn%t>i)p6/7;>t]`Eo@(l=s[?p22);V#f2,$l$Al%0h#p1-Pm9"Fr%8CRmP%GKFHIBb02&g.1QWFKP*(c:htk!psVc;:lcK^p#D?>CSaH'M\FNWX?F=7lbX^KEZG:(asCA7s*R.ZGiAIN;b0XiMt/+0]@[Fa[P]/p>]Tf\;:$NCmBEJS+mkgCK6t#*i:+?4!<9h.pajiNMGet/2i=B*9XFc$-8!^j#+HKNq:(FaJ]r)i+&2`;jQRI2H8>0_9go=h&5tJT!+eOM/t][[^YXV=+i;t\bH>ccj*`<6pq",UWT"=Pk*$Fh2)pcTZhi-EQ=?Lfs0L)44[$Gq!]gMMLBo?jV?B:<-%SJcti[.Uj*CE'XW`W_GK6k@5ORFjH\RcSnRemAe.\V_N.LfMcj`7Sq:&@."kM*N-"qr;nqU`M[kHdt:.RRboG0[b@%i*aL$a[7Pl&9EU?-^'Z!0l=,g8G:omi!t&B*Y.2gN(tCbU]@k&e,"6/\cm/d0grK7.Ki:TN?L&DVtqoUQBb%[NKc*k'9k[8F?$@+EL9+KtmB+'c=\^#_'OAsp6?g/IpV9m$mF!pWqkt)dWN&Q#W!HeEdR'95Z8c>i9jE4;pB=bYWgkFF2qbJg'\.2Dq!W)mgik`_/.6@?:adGItA'XXOZa^)2&[W?O:Of0hKD94l.9aDS,]10#k?0]=o`fomK`44)aeYAaL%k+L<]j\r5-`Ks%+@L]2ORR\g#c&qS#+~> -endstream -endobj -126 0 obj -<< /Type /Page -/Parent 1 0 R -/MediaBox [ 0 0 612 792 ] -/Resources 3 0 R -/Contents 125 0 R -/Annots 127 0 R ->> -endobj -127 0 obj -[ -128 0 R -129 0 R -130 0 R -131 0 R -132 0 R -133 0 R -134 0 R -135 0 R -136 0 R -] -endobj -128 0 obj +144 0 obj << /Type /Annot /Subtype /Link -/Rect [ 225.66 664.8 364.932 652.8 ] +/Rect [ 225.66 262.494 364.932 250.494 ] /C [ 0 0 0 ] /Border [ 0 0 0 ] /A << /URI (http://us.apachecon.com/us2007/program/talk/1953) @@ -1150,10 +1322,10 @@ endobj /H /I >> endobj -129 0 obj +145 0 obj << /Type /Annot /Subtype /Link -/Rect [ 225.66 638.4 480.588 626.4 ] +/Rect [ 225.66 236.094 480.588 224.094 ] /C [ 0 0 0 ] /Border [ 0 0 0 ] /A << /URI (http://us.apachecon.com/us2007/program/talk/2017) @@ -1161,10 +1333,37 @@ endobj /H /I >> endobj -130 0 obj +146 0 obj +<< /Length 2396 /Filter [ /ASCII85Decode /FlateDecode ] + >> +stream +GauHMh2g;K&qAfR6HE(51lV2U;136/D!r-p[$Po4'R+teA_L0790Rd*DssmETk'!Ia`P10:]q_nZtAGZ_tg+B?erD"h>QMs#!;VWnLb,>(dq4(rsNP4?].I;R&Zh1_>Jq&2k/@o+n2WX2^pL*YJH]_5N`YT`)r]5;KH?!<"H[R7)E=Fb@LED.1b$pMZ$tr#FJG'Q$L_:1nj%^2>7*.DJ"cic<1NgA=-LmB06;Zu7Vi=dI9T0Q7Xp\RA&9AU:+VBe5/1/.G.0Z#4]U6uXJ5'";6>l9lm3;Q/\&hpjiRXs:]86Xl-BlPG,]XRHOVIRih5$V/9P(fKa"KF;k45JgoUE$i"LG([8.u1[Pj=X^L[Hq!?-njJ?Xs-gED+5.TB,:^D[C<7ZkYn/JO>HUbsSOa@HZ(PIB4(iD`-.di&=X8&(1;<0T,@4$#*[LTJ0la6hZ?Hia#Ve7t"=O5BJ3#1%@]Er6/gj*#^40VZ;\j#PaB=@:!Sd2!s'2m?S,8_R4%n0YS&a&A](J(CnEC;<*"QD%bbN,go01m7DhL+Il>[nt^e_]ri/m50a5+l1k_Zqmst/:#h4\c5;[K^,Y]$KM\7F)WRB.g6q1gfT<-$gm<2$"g^RfV$3a=C@,>5UqYOs8\upd9aDaDJ3==4a6h/H[;"WP<8LTPZDc3rHkN$k$?r>ildD+NjIAXY6>ItH/8U9)U^/iR[jH=B!1#P'D5?^fdZuHZS:r^[2rL5T&RKP3EZ(n#eVK'bMHM]j>QusBDD^]N\Hm_D8EYA[UuZ)VSJ(u2W$2he_5`e8U"/'*cQ9TEE3hro\/.OVImq_RO#pZ6B0+_`)6RFCY[=u!SQMVQ;%!(.[g??.UT+J`hr6.&B1>+_9nTKUmGs%Z7Q4&Q6R;`q)q0K`BpIk!BEJGiTH`"5ohSmRpFesq_)[i3-">Y:'J0\0;lmI;)mu"d&'8eNc_3d[`-'+[Rl^MET(O8(!"+D]@@Vn<4'lKBDFpFc$W?1uqNPltlr%%PM*S7oan!Y"#$rZa?CmQq0[7O7JPKNj&+Au8+0$=pW.?C./DB%A+aTC.f]kFiV[Cn4=/XYkD`bqa]hDC+o&)gu$3&YG1]Blc#Kbd]RbTW7Th?FkR))!C\F(]F:*rO\>#^";YNWrHaMq;?572]<(h*%RWOG+Y*b3S5t)\p=EYaMs<_0<,p_&rru\>On]!bb"drll]Y?)6`Ake2YQ&998EHkOKq?jba:>bZ&s*s79YqL\:W"?!+HNa/86j.1$U,FV!LfOnB"b=,ch1--J=eTOq)=MeY087R]35YM4=mR8&*q8NgRch3mG(cK\`kdH#pL=Gjfmr[2e]>jGho%jq1%B]h1u~> +endstream +endobj +147 0 obj +<< /Type /Page +/Parent 1 0 R +/MediaBox [ 0 0 612 792 ] +/Resources 3 0 R +/Contents 146 0 R +/Annots 148 0 R +>> +endobj +148 0 obj +[ +149 0 R +150 0 R +151 0 R +152 0 R +153 0 R +154 0 R +155 0 R +] +endobj +149 0 obj << /Type /Annot /Subtype /Link -/Rect [ 127.992 511.147 189.636 499.147 ] +/Rect [ 127.992 639.6 189.636 627.6 ] /C [ 0 0 0 ] /Border [ 0 0 0 ] /A << /URI (http://svn.apache.org/repos/asf/lucene/solr/tags/release-1.2.0/CHANGES.txt) @@ -1172,10 +1371,10 @@ endobj /H /I >> endobj -131 0 obj +150 0 obj << /Type /Annot /Subtype /Link -/Rect [ 254.988 330.988 457.632 318.988 ] +/Rect [ 254.988 459.441 457.632 447.441 ] /C [ 0 0 0 ] /Border [ 0 0 0 ] /A << /URI (http://www.apachecon.com/2006/US/html/sessions.html#FR26) @@ -1183,10 +1382,10 @@ endobj /H /I >> endobj -132 0 obj +151 0 obj << /Type /Annot /Subtype /Link -/Rect [ 311.652 317.788 368.304 305.788 ] +/Rect [ 311.652 446.241 368.304 434.241 ] /C [ 0 0 0 ] /Border [ 0 0 0 ] /A << /URI (http://www.us.apachecon.com/) @@ -1194,10 +1393,10 @@ endobj /H /I >> endobj -133 0 obj +152 0 obj << /Type /Annot /Subtype /Link -/Rect [ 432.624 253.335 489.276 241.335 ] +/Rect [ 432.624 381.788 489.276 369.788 ] /C [ 0 0 0 ] /Border [ 0 0 0 ] /A << /URI (http://www.eu.apachecon.com/) @@ -1205,10 +1404,10 @@ endobj /H /I >> endobj -134 0 obj +153 0 obj << /Type /Annot /Subtype /Link -/Rect [ 365.316 188.882 536.616 176.882 ] +/Rect [ 365.316 317.335 536.616 305.335 ] /C [ 0 0 0 ] /Border [ 0 0 0 ] /A << /URI (http://people.apache.org/builds/lucene/solr/nightly/) @@ -1216,10 +1415,10 @@ endobj /H /I >> endobj -135 0 obj +154 0 obj << /Type /Annot /Subtype /Link -/Rect [ 90.0 175.682 114.672 163.682 ] +/Rect [ 90.0 304.135 114.672 292.135 ] /C [ 0 0 0 ] /Border [ 0 0 0 ] /A << /URI (http://people.apache.org/builds/lucene/solr/nightly/) @@ -1227,10 +1426,10 @@ endobj /H /I >> endobj -136 0 obj +155 0 obj << /Type /Annot /Subtype /Link -/Rect [ 283.968 162.482 321.288 150.482 ] +/Rect [ 283.968 290.935 321.288 278.935 ] /C [ 0 0 0 ] /Border [ 0 0 0 ] /A << /URI (api/index.html) @@ -1238,217 +1437,210 @@ endobj /H /I >> endobj -137 0 obj -<< /Length 695 /Filter [ /ASCII85Decode /FlateDecode ] - >> -stream -GarnUbAQ)X']&X:=Jet`!/&,D2,P]8<(1Y6WJe1mlsX^0#@-U5.t:q(R"DAt/DD.NkFTT)g_S"[Oo.d)W;ZUTj9QrkAXWQr2RT_KLD`a9;kQgu-F`;g"@Xq>$Dp&,Gks4\$NOE@/eZAL>cab!D`SiECeFr)"J1Ab>OeG*"u/]J0.iA_udXTf4Aa3:5Ba>I<5>,"FY)W^rJFsTG^R]@Peim\ZR4sCgF-M2P=D&X?!/X;4Ub,S@q&FZF>?iq#H,>G6f;EsH[MM[?4(IpVFg0EWoIHd7bq5kj%Fks\VQ/rn%B;nY6l6K*sHMO'iOWS,12MY3l48.CXTr4'E/9RuuXa7J]E3gcS5T6OAjT;;>!Sg`o-M\;"GL]afAJX6?53EeDMu3gHisZYV($DY+,7Z_IK[j>2HPjEm$cl%1E<~> -endstream -endobj -138 0 obj -<< /Type /Page -/Parent 1 0 R -/MediaBox [ 0 0 612 792 ] -/Resources 3 0 R -/Contents 137 0 R ->> -endobj -140 0 obj -<< - /Title (\376\377\0\61\0\40\0\127\0\150\0\141\0\164\0\40\0\111\0\163\0\40\0\123\0\157\0\154\0\162\0\77) - /Parent 139 0 R - /Next 141 0 R - /A 9 0 R ->> endobj -141 0 obj -<< - /Title (\376\377\0\62\0\40\0\107\0\145\0\164\0\40\0\123\0\164\0\141\0\162\0\164\0\145\0\144) - /Parent 139 0 R - /Prev 140 0 R - /Next 142 0 R - /A 11 0 R ->> endobj -142 0 obj -<< - /Title (\376\377\0\63\0\40\0\116\0\145\0\167\0\163) - /Parent 139 0 R - /First 143 0 R - /Last 160 0 R - /Prev 141 0 R - /Count -18 - /A 13 0 R ->> endobj -143 0 obj -<< - /Title (\376\377\0\63\0\56\0\61\0\40\0\62\0\65\0\40\0\112\0\165\0\156\0\145\0\40\0\62\0\60\0\61\0\60\0\40\0\55\0\40\0\123\0\157\0\154\0\162\0\40\0\61\0\56\0\64\0\56\0\61\0\40\0\122\0\145\0\154\0\145\0\141\0\163\0\145\0\144) - /Parent 142 0 R - /Next 144 0 R - /A 15 0 R ->> endobj -144 0 obj -<< - /Title (\376\377\0\63\0\56\0\62\0\40\0\67\0\40\0\115\0\141\0\171\0\40\0\62\0\60\0\61\0\60\0\40\0\55\0\40\0\101\0\160\0\141\0\143\0\150\0\145\0\40\0\114\0\165\0\143\0\145\0\156\0\145\0\40\0\105\0\165\0\162\0\157\0\143\0\157\0\156\0\40\0\62\0\60\0\61\0\60\0\40\0\103\0\157\0\155\0\151\0\156\0\147\0\40\0\164\0\157\0\40\0\120\0\162\0\141\0\147\0\165\0\145\0\40\0\115\0\141\0\171\0\40\0\61\0\70\0\55\0\62\0\61) - /Parent 142 0 R - /Prev 143 0 R - /Next 145 0 R - /A 17 0 R ->> endobj -145 0 obj -<< - /Title (\376\377\0\63\0\56\0\63\0\40\0\61\0\60\0\40\0\116\0\157\0\166\0\145\0\155\0\142\0\145\0\162\0\40\0\62\0\60\0\60\0\71\0\40\0\55\0\40\0\123\0\157\0\154\0\162\0\40\0\61\0\56\0\64\0\40\0\122\0\145\0\154\0\145\0\141\0\163\0\145\0\144) - /Parent 142 0 R - /Prev 144 0 R - /Next 146 0 R - /A 19 0 R ->> endobj -146 0 obj -<< - /Title (\376\377\0\63\0\56\0\64\0\40\0\62\0\60\0\40\0\101\0\165\0\147\0\165\0\163\0\164\0\40\0\62\0\60\0\60\0\71\0\40\0\55\0\40\0\123\0\157\0\154\0\162\0\47\0\163\0\40\0\146\0\151\0\162\0\163\0\164\0\40\0\142\0\157\0\157\0\153\0\40\0\151\0\163\0\40\0\160\0\165\0\142\0\154\0\151\0\163\0\150\0\145\0\144\0\41) - /Parent 142 0 R - /Prev 145 0 R - /Next 147 0 R - /A 21 0 R ->> endobj -147 0 obj -<< - /Title (\376\377\0\63\0\56\0\65\0\40\0\61\0\70\0\40\0\101\0\165\0\147\0\165\0\163\0\164\0\40\0\62\0\60\0\60\0\71\0\40\0\55\0\40\0\114\0\165\0\143\0\145\0\156\0\145\0\40\0\141\0\164\0\40\0\125\0\123\0\40\0\101\0\160\0\141\0\143\0\150\0\145\0\103\0\157\0\156) - /Parent 142 0 R - /Prev 146 0 R - /Next 148 0 R - /A 23 0 R ->> endobj -148 0 obj -<< - /Title (\376\377\0\63\0\56\0\66\0\40\0\60\0\71\0\40\0\106\0\145\0\142\0\162\0\165\0\141\0\162\0\171\0\40\0\62\0\60\0\60\0\71\0\40\0\55\0\40\0\114\0\165\0\143\0\145\0\156\0\145\0\40\0\141\0\164\0\40\0\101\0\160\0\141\0\143\0\150\0\145\0\103\0\157\0\156\0\40\0\105\0\165\0\162\0\157\0\160\0\145\0\40\0\62\0\60\0\60\0\71\0\40\0\151\0\156\0\40\0\101\0\155\0\163\0\164\0\145\0\162\0\144\0\141\0\155) - /Parent 142 0 R - /Prev 147 0 R - /Next 149 0 R - /A 25 0 R ->> endobj -149 0 obj -<< - /Title (\376\377\0\63\0\56\0\67\0\40\0\61\0\71\0\40\0\104\0\145\0\143\0\145\0\155\0\142\0\145\0\162\0\40\0\62\0\60\0\60\0\70\0\40\0\55\0\40\0\123\0\157\0\154\0\162\0\40\0\114\0\157\0\147\0\157\0\40\0\103\0\157\0\156\0\164\0\145\0\163\0\164\0\40\0\122\0\145\0\163\0\165\0\154\0\164\0\163) - /Parent 142 0 R - /Prev 148 0 R - /Next 150 0 R - /A 27 0 R ->> endobj -150 0 obj -<< - /Title (\376\377\0\63\0\56\0\70\0\40\0\60\0\63\0\40\0\117\0\143\0\164\0\157\0\142\0\145\0\162\0\40\0\62\0\60\0\60\0\70\0\40\0\55\0\40\0\123\0\157\0\154\0\162\0\40\0\114\0\157\0\147\0\157\0\40\0\103\0\157\0\156\0\164\0\145\0\163\0\164) - /Parent 142 0 R - /Prev 149 0 R - /Next 151 0 R - /A 29 0 R ->> endobj -151 0 obj -<< - /Title (\376\377\0\63\0\56\0\71\0\40\0\61\0\65\0\40\0\123\0\145\0\160\0\164\0\145\0\155\0\142\0\145\0\162\0\40\0\62\0\60\0\60\0\70\0\40\0\55\0\40\0\123\0\157\0\154\0\162\0\40\0\61\0\56\0\63\0\56\0\60\0\40\0\101\0\166\0\141\0\151\0\154\0\141\0\142\0\154\0\145) - /Parent 142 0 R - /Prev 150 0 R - /Next 152 0 R - /A 31 0 R ->> endobj -152 0 obj -<< - /Title (\376\377\0\63\0\56\0\61\0\60\0\40\0\62\0\70\0\40\0\101\0\165\0\147\0\165\0\163\0\164\0\40\0\62\0\60\0\60\0\70\0\40\0\55\0\40\0\114\0\165\0\143\0\145\0\156\0\145\0\57\0\123\0\157\0\154\0\162\0\40\0\141\0\164\0\40\0\101\0\160\0\141\0\143\0\150\0\145\0\103\0\157\0\156\0\40\0\116\0\145\0\167\0\40\0\117\0\162\0\154\0\145\0\141\0\156\0\163) - /Parent 142 0 R - /Prev 151 0 R - /Next 153 0 R - /A 33 0 R ->> endobj -153 0 obj -<< - /Title (\376\377\0\63\0\56\0\61\0\61\0\40\0\60\0\63\0\40\0\123\0\145\0\160\0\164\0\145\0\155\0\142\0\145\0\162\0\40\0\62\0\60\0\60\0\67\0\40\0\55\0\40\0\114\0\165\0\143\0\145\0\156\0\145\0\40\0\141\0\164\0\40\0\101\0\160\0\141\0\143\0\150\0\145\0\103\0\157\0\156\0\40\0\101\0\164\0\154\0\141\0\156\0\164\0\141) - /Parent 142 0 R - /Prev 152 0 R - /Next 154 0 R - /A 35 0 R ->> endobj -154 0 obj -<< - /Title (\376\377\0\63\0\56\0\61\0\62\0\40\0\60\0\66\0\40\0\112\0\165\0\156\0\145\0\40\0\62\0\60\0\60\0\67\0\72\0\40\0\122\0\145\0\154\0\145\0\141\0\163\0\145\0\40\0\61\0\56\0\62\0\40\0\141\0\166\0\141\0\151\0\154\0\141\0\142\0\154\0\145) - /Parent 142 0 R - /Prev 153 0 R - /Next 155 0 R - /A 37 0 R ->> endobj -155 0 obj -<< - /Title (\376\377\0\63\0\56\0\61\0\63\0\40\0\61\0\67\0\40\0\112\0\141\0\156\0\165\0\141\0\162\0\171\0\40\0\62\0\60\0\60\0\67\0\72\0\40\0\123\0\157\0\154\0\162\0\40\0\147\0\162\0\141\0\144\0\165\0\141\0\164\0\145\0\163\0\40\0\146\0\162\0\157\0\155\0\40\0\111\0\156\0\143\0\165\0\142\0\141\0\164\0\157\0\162) - /Parent 142 0 R - /Prev 154 0 R - /Next 156 0 R - /A 39 0 R ->> endobj -156 0 obj -<< - /Title (\376\377\0\63\0\56\0\61\0\64\0\40\0\62\0\62\0\40\0\104\0\145\0\143\0\145\0\155\0\142\0\145\0\162\0\40\0\62\0\60\0\60\0\66\0\72\0\40\0\122\0\145\0\154\0\145\0\141\0\163\0\145\0\40\0\61\0\56\0\61\0\56\0\60\0\40\0\141\0\166\0\141\0\151\0\154\0\141\0\142\0\154\0\145) - /Parent 142 0 R - /Prev 155 0 R - /Next 157 0 R - /A 41 0 R ->> endobj 157 0 obj << - /Title (\376\377\0\63\0\56\0\61\0\65\0\40\0\61\0\65\0\40\0\101\0\165\0\147\0\165\0\163\0\164\0\40\0\62\0\60\0\60\0\66\0\72\0\40\0\123\0\157\0\154\0\162\0\40\0\141\0\164\0\40\0\101\0\160\0\141\0\143\0\150\0\145\0\103\0\157\0\156\0\40\0\125\0\123) - /Parent 142 0 R - /Prev 156 0 R + /Title (\376\377\0\61\0\40\0\127\0\150\0\141\0\164\0\40\0\111\0\163\0\40\0\123\0\157\0\154\0\162\0\77) + /Parent 156 0 R /Next 158 0 R - /A 43 0 R + /A 9 0 R >> endobj 158 0 obj << - /Title (\376\377\0\63\0\56\0\61\0\66\0\40\0\62\0\61\0\40\0\101\0\160\0\162\0\151\0\154\0\40\0\62\0\60\0\60\0\66\0\72\0\40\0\123\0\157\0\154\0\162\0\40\0\141\0\164\0\40\0\101\0\160\0\141\0\143\0\150\0\145\0\103\0\157\0\156) - /Parent 142 0 R + /Title (\376\377\0\62\0\40\0\107\0\145\0\164\0\40\0\123\0\164\0\141\0\162\0\164\0\145\0\144) + /Parent 156 0 R /Prev 157 0 R /Next 159 0 R - /A 45 0 R + /A 11 0 R >> endobj 159 0 obj << - /Title (\376\377\0\63\0\56\0\61\0\67\0\40\0\62\0\61\0\40\0\106\0\145\0\142\0\162\0\165\0\141\0\162\0\171\0\40\0\62\0\60\0\60\0\66\0\72\0\40\0\156\0\151\0\147\0\150\0\164\0\154\0\171\0\40\0\142\0\165\0\151\0\154\0\144\0\163) - /Parent 142 0 R + /Title (\376\377\0\63\0\40\0\116\0\145\0\167\0\163) + /Parent 156 0 R + /First 160 0 R + /Last 178 0 R /Prev 158 0 R - /Next 160 0 R - /A 47 0 R + /Count -19 + /A 13 0 R >> endobj 160 0 obj << - /Title (\376\377\0\63\0\56\0\61\0\70\0\40\0\61\0\67\0\40\0\112\0\141\0\156\0\165\0\141\0\162\0\171\0\40\0\62\0\60\0\60\0\66\0\72\0\40\0\123\0\157\0\154\0\162\0\40\0\112\0\157\0\151\0\156\0\163\0\40\0\101\0\160\0\141\0\143\0\150\0\145\0\40\0\111\0\156\0\143\0\165\0\142\0\141\0\164\0\157\0\162) - /Parent 142 0 R - /Prev 159 0 R - /A 49 0 R + /Title (\376\377\0\63\0\56\0\61\0\40\0\115\0\141\0\162\0\143\0\150\0\40\0\62\0\60\0\61\0\61\0\40\0\55\0\40\0\123\0\157\0\154\0\162\0\40\0\63\0\56\0\61\0\40\0\122\0\145\0\154\0\145\0\141\0\163\0\145\0\144) + /Parent 159 0 R + /Next 161 0 R + /A 15 0 R >> endobj 161 0 obj +<< + /Title (\376\377\0\63\0\56\0\62\0\40\0\62\0\65\0\40\0\112\0\165\0\156\0\145\0\40\0\62\0\60\0\61\0\60\0\40\0\55\0\40\0\123\0\157\0\154\0\162\0\40\0\61\0\56\0\64\0\56\0\61\0\40\0\122\0\145\0\154\0\145\0\141\0\163\0\145\0\144) + /Parent 159 0 R + /Prev 160 0 R + /Next 162 0 R + /A 17 0 R +>> endobj +162 0 obj +<< + /Title (\376\377\0\63\0\56\0\63\0\40\0\67\0\40\0\115\0\141\0\171\0\40\0\62\0\60\0\61\0\60\0\40\0\55\0\40\0\101\0\160\0\141\0\143\0\150\0\145\0\40\0\114\0\165\0\143\0\145\0\156\0\145\0\40\0\105\0\165\0\162\0\157\0\143\0\157\0\156\0\40\0\62\0\60\0\61\0\60\0\40\0\103\0\157\0\155\0\151\0\156\0\147\0\40\0\164\0\157\0\40\0\120\0\162\0\141\0\147\0\165\0\145\0\40\0\115\0\141\0\171\0\40\0\61\0\70\0\55\0\62\0\61) + /Parent 159 0 R + /Prev 161 0 R + /Next 163 0 R + /A 19 0 R +>> endobj +163 0 obj +<< + /Title (\376\377\0\63\0\56\0\64\0\40\0\61\0\60\0\40\0\116\0\157\0\166\0\145\0\155\0\142\0\145\0\162\0\40\0\62\0\60\0\60\0\71\0\40\0\55\0\40\0\123\0\157\0\154\0\162\0\40\0\61\0\56\0\64\0\40\0\122\0\145\0\154\0\145\0\141\0\163\0\145\0\144) + /Parent 159 0 R + /Prev 162 0 R + /Next 164 0 R + /A 21 0 R +>> endobj +164 0 obj +<< + /Title (\376\377\0\63\0\56\0\65\0\40\0\62\0\60\0\40\0\101\0\165\0\147\0\165\0\163\0\164\0\40\0\62\0\60\0\60\0\71\0\40\0\55\0\40\0\123\0\157\0\154\0\162\0\47\0\163\0\40\0\146\0\151\0\162\0\163\0\164\0\40\0\142\0\157\0\157\0\153\0\40\0\151\0\163\0\40\0\160\0\165\0\142\0\154\0\151\0\163\0\150\0\145\0\144\0\41) + /Parent 159 0 R + /Prev 163 0 R + /Next 165 0 R + /A 23 0 R +>> endobj +165 0 obj +<< + /Title (\376\377\0\63\0\56\0\66\0\40\0\61\0\70\0\40\0\101\0\165\0\147\0\165\0\163\0\164\0\40\0\62\0\60\0\60\0\71\0\40\0\55\0\40\0\114\0\165\0\143\0\145\0\156\0\145\0\40\0\141\0\164\0\40\0\125\0\123\0\40\0\101\0\160\0\141\0\143\0\150\0\145\0\103\0\157\0\156) + /Parent 159 0 R + /Prev 164 0 R + /Next 166 0 R + /A 25 0 R +>> endobj +166 0 obj +<< + /Title (\376\377\0\63\0\56\0\67\0\40\0\60\0\71\0\40\0\106\0\145\0\142\0\162\0\165\0\141\0\162\0\171\0\40\0\62\0\60\0\60\0\71\0\40\0\55\0\40\0\114\0\165\0\143\0\145\0\156\0\145\0\40\0\141\0\164\0\40\0\101\0\160\0\141\0\143\0\150\0\145\0\103\0\157\0\156\0\40\0\105\0\165\0\162\0\157\0\160\0\145\0\40\0\62\0\60\0\60\0\71\0\40\0\151\0\156\0\40\0\101\0\155\0\163\0\164\0\145\0\162\0\144\0\141\0\155) + /Parent 159 0 R + /Prev 165 0 R + /Next 167 0 R + /A 27 0 R +>> endobj +167 0 obj +<< + /Title (\376\377\0\63\0\56\0\70\0\40\0\61\0\71\0\40\0\104\0\145\0\143\0\145\0\155\0\142\0\145\0\162\0\40\0\62\0\60\0\60\0\70\0\40\0\55\0\40\0\123\0\157\0\154\0\162\0\40\0\114\0\157\0\147\0\157\0\40\0\103\0\157\0\156\0\164\0\145\0\163\0\164\0\40\0\122\0\145\0\163\0\165\0\154\0\164\0\163) + /Parent 159 0 R + /Prev 166 0 R + /Next 168 0 R + /A 29 0 R +>> endobj +168 0 obj +<< + /Title (\376\377\0\63\0\56\0\71\0\40\0\60\0\63\0\40\0\117\0\143\0\164\0\157\0\142\0\145\0\162\0\40\0\62\0\60\0\60\0\70\0\40\0\55\0\40\0\123\0\157\0\154\0\162\0\40\0\114\0\157\0\147\0\157\0\40\0\103\0\157\0\156\0\164\0\145\0\163\0\164) + /Parent 159 0 R + /Prev 167 0 R + /Next 169 0 R + /A 31 0 R +>> endobj +169 0 obj +<< + /Title (\376\377\0\63\0\56\0\61\0\60\0\40\0\61\0\65\0\40\0\123\0\145\0\160\0\164\0\145\0\155\0\142\0\145\0\162\0\40\0\62\0\60\0\60\0\70\0\40\0\55\0\40\0\123\0\157\0\154\0\162\0\40\0\61\0\56\0\63\0\56\0\60\0\40\0\101\0\166\0\141\0\151\0\154\0\141\0\142\0\154\0\145) + /Parent 159 0 R + /Prev 168 0 R + /Next 170 0 R + /A 33 0 R +>> endobj +170 0 obj +<< + /Title (\376\377\0\63\0\56\0\61\0\61\0\40\0\62\0\70\0\40\0\101\0\165\0\147\0\165\0\163\0\164\0\40\0\62\0\60\0\60\0\70\0\40\0\55\0\40\0\114\0\165\0\143\0\145\0\156\0\145\0\57\0\123\0\157\0\154\0\162\0\40\0\141\0\164\0\40\0\101\0\160\0\141\0\143\0\150\0\145\0\103\0\157\0\156\0\40\0\116\0\145\0\167\0\40\0\117\0\162\0\154\0\145\0\141\0\156\0\163) + /Parent 159 0 R + /Prev 169 0 R + /Next 171 0 R + /A 35 0 R +>> endobj +171 0 obj +<< + /Title (\376\377\0\63\0\56\0\61\0\62\0\40\0\60\0\63\0\40\0\123\0\145\0\160\0\164\0\145\0\155\0\142\0\145\0\162\0\40\0\62\0\60\0\60\0\67\0\40\0\55\0\40\0\114\0\165\0\143\0\145\0\156\0\145\0\40\0\141\0\164\0\40\0\101\0\160\0\141\0\143\0\150\0\145\0\103\0\157\0\156\0\40\0\101\0\164\0\154\0\141\0\156\0\164\0\141) + /Parent 159 0 R + /Prev 170 0 R + /Next 172 0 R + /A 37 0 R +>> endobj +172 0 obj +<< + /Title (\376\377\0\63\0\56\0\61\0\63\0\40\0\60\0\66\0\40\0\112\0\165\0\156\0\145\0\40\0\62\0\60\0\60\0\67\0\72\0\40\0\122\0\145\0\154\0\145\0\141\0\163\0\145\0\40\0\61\0\56\0\62\0\40\0\141\0\166\0\141\0\151\0\154\0\141\0\142\0\154\0\145) + /Parent 159 0 R + /Prev 171 0 R + /Next 173 0 R + /A 39 0 R +>> endobj +173 0 obj +<< + /Title (\376\377\0\63\0\56\0\61\0\64\0\40\0\61\0\67\0\40\0\112\0\141\0\156\0\165\0\141\0\162\0\171\0\40\0\62\0\60\0\60\0\67\0\72\0\40\0\123\0\157\0\154\0\162\0\40\0\147\0\162\0\141\0\144\0\165\0\141\0\164\0\145\0\163\0\40\0\146\0\162\0\157\0\155\0\40\0\111\0\156\0\143\0\165\0\142\0\141\0\164\0\157\0\162) + /Parent 159 0 R + /Prev 172 0 R + /Next 174 0 R + /A 41 0 R +>> endobj +174 0 obj +<< + /Title (\376\377\0\63\0\56\0\61\0\65\0\40\0\62\0\62\0\40\0\104\0\145\0\143\0\145\0\155\0\142\0\145\0\162\0\40\0\62\0\60\0\60\0\66\0\72\0\40\0\122\0\145\0\154\0\145\0\141\0\163\0\145\0\40\0\61\0\56\0\61\0\56\0\60\0\40\0\141\0\166\0\141\0\151\0\154\0\141\0\142\0\154\0\145) + /Parent 159 0 R + /Prev 173 0 R + /Next 175 0 R + /A 43 0 R +>> endobj +175 0 obj +<< + /Title (\376\377\0\63\0\56\0\61\0\66\0\40\0\61\0\65\0\40\0\101\0\165\0\147\0\165\0\163\0\164\0\40\0\62\0\60\0\60\0\66\0\72\0\40\0\123\0\157\0\154\0\162\0\40\0\141\0\164\0\40\0\101\0\160\0\141\0\143\0\150\0\145\0\103\0\157\0\156\0\40\0\125\0\123) + /Parent 159 0 R + /Prev 174 0 R + /Next 176 0 R + /A 45 0 R +>> endobj +176 0 obj +<< + /Title (\376\377\0\63\0\56\0\61\0\67\0\40\0\62\0\61\0\40\0\101\0\160\0\162\0\151\0\154\0\40\0\62\0\60\0\60\0\66\0\72\0\40\0\123\0\157\0\154\0\162\0\40\0\141\0\164\0\40\0\101\0\160\0\141\0\143\0\150\0\145\0\103\0\157\0\156) + /Parent 159 0 R + /Prev 175 0 R + /Next 177 0 R + /A 47 0 R +>> endobj +177 0 obj +<< + /Title (\376\377\0\63\0\56\0\61\0\70\0\40\0\62\0\61\0\40\0\106\0\145\0\142\0\162\0\165\0\141\0\162\0\171\0\40\0\62\0\60\0\60\0\66\0\72\0\40\0\156\0\151\0\147\0\150\0\164\0\154\0\171\0\40\0\142\0\165\0\151\0\154\0\144\0\163) + /Parent 159 0 R + /Prev 176 0 R + /Next 178 0 R + /A 49 0 R +>> endobj +178 0 obj +<< + /Title (\376\377\0\63\0\56\0\61\0\71\0\40\0\61\0\67\0\40\0\112\0\141\0\156\0\165\0\141\0\162\0\171\0\40\0\62\0\60\0\60\0\66\0\72\0\40\0\123\0\157\0\154\0\162\0\40\0\112\0\157\0\151\0\156\0\163\0\40\0\101\0\160\0\141\0\143\0\150\0\145\0\40\0\111\0\156\0\143\0\165\0\142\0\141\0\164\0\157\0\162) + /Parent 159 0 R + /Prev 177 0 R + /A 51 0 R +>> endobj +179 0 obj << /Type /Font /Subtype /Type1 /Name /F3 /BaseFont /Helvetica-Bold /Encoding /WinAnsiEncoding >> endobj -162 0 obj +180 0 obj << /Type /Font /Subtype /Type1 /Name /F5 /BaseFont /Times-Roman /Encoding /WinAnsiEncoding >> endobj -163 0 obj +181 0 obj << /Type /Font /Subtype /Type1 /Name /F1 /BaseFont /Helvetica /Encoding /WinAnsiEncoding >> endobj -164 0 obj +182 0 obj << /Type /Font /Subtype /Type1 /Name /F2 /BaseFont /Helvetica-Oblique /Encoding /WinAnsiEncoding >> endobj -165 0 obj +183 0 obj << /Type /Font /Subtype /Type1 /Name /F7 @@ -1458,328 +1650,352 @@ endobj 1 0 obj << /Type /Pages /Count 8 -/Kids [6 0 R 51 0 R 65 0 R 79 0 R 94 0 R 112 0 R 126 0 R 138 0 R ] >> +/Kids [6 0 R 53 0 R 69 0 R 90 0 R 98 0 R 117 0 R 134 0 R 147 0 R ] >> endobj 2 0 obj << /Type /Catalog /Pages 1 0 R - /Outlines 139 0 R + /Outlines 156 0 R /PageMode /UseOutlines >> endobj 3 0 obj << -/Font << /F3 161 0 R /F5 162 0 R /F1 163 0 R /F2 164 0 R /F7 165 0 R >> -/ProcSet [ /PDF /ImageC /Text ] /XObject <> +/ProcSet [ /PDF /ImageC /Text ] /XObject <> >> endobj 9 0 obj << /S /GoTo -/D [51 0 R /XYZ 85.0 659.0 null] +/D [53 0 R /XYZ 85.0 659.0 null] >> endobj 11 0 obj << /S /GoTo -/D [51 0 R /XYZ 85.0 411.066 null] +/D [53 0 R /XYZ 85.0 411.066 null] >> endobj 13 0 obj << /S /GoTo -/D [51 0 R /XYZ 85.0 307.932 null] +/D [53 0 R /XYZ 85.0 307.932 null] >> endobj 15 0 obj << /S /GoTo -/D [51 0 R /XYZ 85.0 276.798 null] +/D [53 0 R /XYZ 85.0 276.798 null] >> endobj 17 0 obj << /S /GoTo -/D [51 0 R /XYZ 85.0 177.945 null] +/D [69 0 R /XYZ 85.0 486.6 null] >> endobj 19 0 obj << /S /GoTo -/D [65 0 R /XYZ 85.0 421.8 null] +/D [69 0 R /XYZ 85.0 387.747 null] >> endobj 21 0 obj << /S /GoTo -/D [65 0 R /XYZ 85.0 184.947 null] +/D [90 0 R /XYZ 85.0 639.8 null] >> endobj 23 0 obj << /S /GoTo -/D [79 0 R /XYZ 85.0 402.6 null] +/D [90 0 R /XYZ 85.0 402.947 null] >> endobj 25 0 obj << /S /GoTo -/D [94 0 R /XYZ 85.0 573.8 null] +/D [98 0 R /XYZ 85.0 611.4 null] >> endobj 27 0 obj << /S /GoTo -/D [94 0 R /XYZ 85.0 243.947 null] +/D [98 0 R /XYZ 85.0 254.147 null] >> endobj 29 0 obj << /S /GoTo -/D [94 0 R /XYZ 85.0 179.494 null] +/D [117 0 R /XYZ 85.0 363.2 null] >> endobj 31 0 obj << /S /GoTo -/D [112 0 R /XYZ 85.0 624.6 null] +/D [117 0 R /XYZ 85.0 298.747 null] >> endobj 33 0 obj << /S /GoTo -/D [112 0 R /XYZ 85.0 525.747 null] +/D [117 0 R /XYZ 85.0 221.094 null] >> endobj 35 0 obj << /S /GoTo -/D [112 0 R /XYZ 85.0 288.294 null] +/D [134 0 R /XYZ 85.0 659.0 null] >> endobj 37 0 obj << /S /GoTo -/D [126 0 R /XYZ 85.0 600.2 null] +/D [134 0 R /XYZ 85.0 421.547 null] >> endobj 39 0 obj << /S /GoTo -/D [126 0 R /XYZ 85.0 488.147 null] +/D [134 0 R /XYZ 85.0 197.894 null] >> endobj 41 0 obj << /S /GoTo -/D [126 0 R /XYZ 85.0 436.894 null] +/D [147 0 R /XYZ 85.0 616.6 null] >> endobj 43 0 obj << /S /GoTo -/D [126 0 R /XYZ 85.0 359.241 null] +/D [147 0 R /XYZ 85.0 565.347 null] >> endobj 45 0 obj << /S /GoTo -/D [126 0 R /XYZ 85.0 294.788 null] +/D [147 0 R /XYZ 85.0 487.694 null] >> endobj 47 0 obj << /S /GoTo -/D [126 0 R /XYZ 85.0 217.135 null] +/D [147 0 R /XYZ 85.0 423.241 null] >> endobj 49 0 obj << /S /GoTo -/D [138 0 R /XYZ 85.0 659.0 null] +/D [147 0 R /XYZ 85.0 345.588 null] >> endobj -139 0 obj +51 0 obj << - /First 140 0 R - /Last 142 0 R +/S /GoTo +/D [147 0 R /XYZ 85.0 267.935 null] +>> +endobj +156 0 obj +<< + /First 157 0 R + /Last 159 0 R >> endobj xref -0 166 +0 184 0000000000 65535 f -0000068530 00000 n -0000068640 00000 n -0000068733 00000 n +0000074160 00000 n +0000074270 00000 n +0000074363 00000 n 0000000015 00000 n 0000000071 00000 n -0000001561 00000 n -0000001681 00000 n -0000001846 00000 n -0000068903 00000 n -0000001981 00000 n -0000068966 00000 n -0000002118 00000 n -0000069032 00000 n -0000002255 00000 n -0000069098 00000 n -0000002392 00000 n -0000069164 00000 n -0000002528 00000 n -0000069230 00000 n -0000002665 00000 n -0000069294 00000 n -0000002802 00000 n -0000069360 00000 n -0000002939 00000 n -0000069424 00000 n -0000003076 00000 n -0000069488 00000 n -0000003213 00000 n -0000069554 00000 n -0000003350 00000 n -0000069620 00000 n -0000003486 00000 n -0000069685 00000 n -0000003623 00000 n -0000069752 00000 n -0000003759 00000 n -0000069819 00000 n -0000003896 00000 n -0000069884 00000 n -0000004033 00000 n -0000069951 00000 n -0000004170 00000 n -0000070018 00000 n -0000004306 00000 n -0000070085 00000 n -0000004443 00000 n -0000070152 00000 n -0000004580 00000 n -0000070219 00000 n -0000004717 00000 n -0000007583 00000 n -0000007706 00000 n -0000007803 00000 n -0000007981 00000 n -0000008165 00000 n -0000008332 00000 n -0000008523 00000 n -0000008726 00000 n -0000008892 00000 n -0000009083 00000 n -0000009254 00000 n -0000009449 00000 n -0000009652 00000 n -0000009880 00000 n -0000013826 00000 n -0000013949 00000 n -0000014046 00000 n -0000014222 00000 n -0000014448 00000 n -0000014635 00000 n -0000014829 00000 n -0000015014 00000 n -0000015199 00000 n -0000015390 00000 n -0000015593 00000 n -0000015821 00000 n -0000016141 00000 n -0000016458 00000 n -0000020294 00000 n -0000020417 00000 n -0000020514 00000 n -0000020704 00000 n -0000020911 00000 n -0000021116 00000 n -0000021319 00000 n -0000021522 00000 n -0000021724 00000 n -0000021927 00000 n -0000022130 00000 n -0000022333 00000 n -0000022536 00000 n -0000022737 00000 n -0000026703 00000 n -0000037179 00000 n -0000037302 00000 n -0000037430 00000 n -0000037629 00000 n -0000037828 00000 n -0000038026 00000 n -0000038225 00000 n -0000038415 00000 n -0000038604 00000 n -0000038804 00000 n -0000039004 00000 n -0000039204 00000 n -0000039404 00000 n -0000039604 00000 n -0000039804 00000 n -0000040002 00000 n -0000040202 00000 n -0000043303 00000 n -0000052580 00000 n -0000052706 00000 n -0000052815 00000 n -0000053005 00000 n -0000053234 00000 n -0000053438 00000 n -0000053624 00000 n -0000053813 00000 n -0000054012 00000 n -0000054211 00000 n -0000054420 00000 n -0000054623 00000 n -0000054825 00000 n -0000055028 00000 n -0000058045 00000 n -0000058171 00000 n -0000058264 00000 n -0000058462 00000 n -0000058660 00000 n -0000058889 00000 n -0000059100 00000 n -0000059283 00000 n -0000059466 00000 n -0000059673 00000 n -0000059877 00000 n -0000060046 00000 n -0000060834 00000 n -0000070284 00000 n -0000060944 00000 n -0000061113 00000 n -0000061288 00000 n -0000061450 00000 n -0000061741 00000 n -0000062230 00000 n -0000062550 00000 n -0000062941 00000 n -0000063281 00000 n -0000063758 00000 n -0000064128 00000 n -0000064445 00000 n -0000064787 00000 n -0000065214 00000 n -0000065607 00000 n -0000065927 00000 n -0000066315 00000 n -0000066669 00000 n -0000066997 00000 n -0000067302 00000 n -0000067608 00000 n -0000067969 00000 n -0000068083 00000 n -0000068194 00000 n -0000068303 00000 n -0000068420 00000 n +0000001604 00000 n +0000001724 00000 n +0000001896 00000 n +0000074534 00000 n +0000002031 00000 n +0000074597 00000 n +0000002168 00000 n +0000074663 00000 n +0000002305 00000 n +0000074729 00000 n +0000002441 00000 n +0000074795 00000 n +0000002578 00000 n +0000074859 00000 n +0000002714 00000 n +0000074925 00000 n +0000002851 00000 n +0000074989 00000 n +0000002988 00000 n +0000075055 00000 n +0000003125 00000 n +0000075119 00000 n +0000003262 00000 n +0000075185 00000 n +0000003399 00000 n +0000075250 00000 n +0000003536 00000 n +0000075317 00000 n +0000003672 00000 n +0000075384 00000 n +0000003809 00000 n +0000075449 00000 n +0000003945 00000 n +0000075516 00000 n +0000004082 00000 n +0000075583 00000 n +0000004219 00000 n +0000075648 00000 n +0000004356 00000 n +0000075715 00000 n +0000004492 00000 n +0000075782 00000 n +0000004629 00000 n +0000075849 00000 n +0000004766 00000 n +0000075916 00000 n +0000004903 00000 n +0000008038 00000 n +0000008161 00000 n +0000008272 00000 n +0000008450 00000 n +0000008634 00000 n +0000008801 00000 n +0000008992 00000 n +0000009195 00000 n +0000009361 00000 n +0000009552 00000 n +0000009723 00000 n +0000009918 00000 n +0000010120 00000 n +0000010336 00000 n +0000010531 00000 n +0000010717 00000 n +0000014956 00000 n +0000015079 00000 n +0000015225 00000 n +0000015424 00000 n +0000015620 00000 n +0000015812 00000 n +0000015997 00000 n +0000016218 00000 n +0000016404 00000 n +0000016596 00000 n +0000016780 00000 n +0000016975 00000 n +0000017205 00000 n +0000017408 00000 n +0000017636 00000 n +0000017816 00000 n +0000018046 00000 n +0000018237 00000 n +0000018435 00000 n +0000018624 00000 n +0000018813 00000 n +0000021939 00000 n +0000022062 00000 n +0000022117 00000 n +0000022308 00000 n +0000022511 00000 n +0000022739 00000 n +0000023059 00000 n +0000023376 00000 n +0000027305 00000 n +0000027428 00000 n +0000027568 00000 n +0000027759 00000 n +0000027967 00000 n +0000028173 00000 n +0000028377 00000 n +0000028581 00000 n +0000028784 00000 n +0000028988 00000 n +0000029192 00000 n +0000029396 00000 n +0000029600 00000 n +0000029802 00000 n +0000030006 00000 n +0000030210 00000 n +0000030413 00000 n +0000030617 00000 n +0000034193 00000 n +0000044670 00000 n +0000044796 00000 n +0000044921 00000 n +0000045107 00000 n +0000045292 00000 n +0000045488 00000 n +0000045684 00000 n +0000045880 00000 n +0000046076 00000 n +0000046272 00000 n +0000046468 00000 n +0000046662 00000 n +0000046858 00000 n +0000047052 00000 n +0000047281 00000 n +0000047485 00000 n +0000050709 00000 n +0000059986 00000 n +0000060112 00000 n +0000060213 00000 n +0000060399 00000 n +0000060588 00000 n +0000060787 00000 n +0000060986 00000 n +0000061195 00000 n +0000061398 00000 n +0000061600 00000 n +0000061803 00000 n +0000062005 00000 n +0000062207 00000 n +0000064697 00000 n +0000064823 00000 n +0000064900 00000 n +0000065125 00000 n +0000065336 00000 n +0000065519 00000 n +0000065702 00000 n +0000065909 00000 n +0000066113 00000 n +0000075983 00000 n +0000066282 00000 n +0000066451 00000 n +0000066626 00000 n +0000066788 00000 n +0000067060 00000 n +0000067366 00000 n +0000067855 00000 n +0000068175 00000 n +0000068566 00000 n +0000068906 00000 n +0000069383 00000 n +0000069753 00000 n +0000070070 00000 n +0000070417 00000 n +0000070844 00000 n +0000071237 00000 n +0000071557 00000 n +0000071945 00000 n +0000072299 00000 n +0000072627 00000 n +0000072932 00000 n +0000073238 00000 n +0000073599 00000 n +0000073713 00000 n +0000073824 00000 n +0000073933 00000 n +0000074050 00000 n trailer << -/Size 166 +/Size 184 /Root 2 0 R /Info 4 0 R >> startxref -70338 +76037 %%EOF diff --git a/solr/site/issue_tracking.html b/solr/site/issue_tracking.html index 51742ebc16c..afa8d89c7b5 100755 --- a/solr/site/issue_tracking.html +++ b/solr/site/issue_tracking.html @@ -130,6 +130,9 @@ document.write("Last Published: " + document.lastModified);

    +