diff --git a/build.xml b/build.xml index 76258509ebc..05d3df35af1 100644 --- a/build.xml +++ b/build.xml @@ -38,6 +38,18 @@ + + + + + + + + + + + diff --git a/dev-tools/eclipse/dot.classpath b/dev-tools/eclipse/dot.classpath index eb7e0ee8094..9f2c3286d05 100644 --- a/dev-tools/eclipse/dot.classpath +++ b/dev-tools/eclipse/dot.classpath @@ -1,6 +1,7 @@ + @@ -73,6 +74,10 @@ + + + + @@ -88,8 +93,8 @@ - - + + @@ -119,7 +124,7 @@ - + @@ -151,6 +156,12 @@ + + + + + + diff --git a/dev-tools/eclipse/lucene-codestyle.xml b/dev-tools/eclipse/lucene-codestyle.xml new file mode 100644 index 00000000000..11c8e40e5e0 --- /dev/null +++ b/dev-tools/eclipse/lucene-codestyle.xml @@ -0,0 +1,269 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/dev-tools/idea/.idea/ant.xml b/dev-tools/idea/.idea/ant.xml index 4616818c80e..b5c457676a8 100644 --- a/dev-tools/idea/.idea/ant.xml +++ b/dev-tools/idea/.idea/ant.xml @@ -31,5 +31,6 @@ + diff --git a/dev-tools/idea/.idea/copyright/Apache_Software_Foundation.xml b/dev-tools/idea/.idea/copyright/Apache_Software_Foundation.xml new file mode 100644 index 00000000000..b7458479239 --- /dev/null +++ b/dev-tools/idea/.idea/copyright/Apache_Software_Foundation.xml @@ -0,0 +1,9 @@ + + + + \ No newline at end of file diff --git a/dev-tools/idea/.idea/copyright/profiles_settings.xml b/dev-tools/idea/.idea/copyright/profiles_settings.xml new file mode 100644 index 00000000000..5551fbedba0 --- /dev/null +++ b/dev-tools/idea/.idea/copyright/profiles_settings.xml @@ -0,0 +1,27 @@ + + + + + + + + + + + + + + + + + diff --git a/dev-tools/idea/.idea/modules.xml b/dev-tools/idea/.idea/modules.xml index d436d9d5f96..ecd7c276c37 100644 --- a/dev-tools/idea/.idea/modules.xml +++ b/dev-tools/idea/.idea/modules.xml @@ -32,6 +32,7 @@ + diff --git a/dev-tools/idea/.idea/vcs.xml b/dev-tools/idea/.idea/vcs.xml index ebabb34f161..ad48dc971f9 100644 --- a/dev-tools/idea/.idea/vcs.xml +++ b/dev-tools/idea/.idea/vcs.xml @@ -1,5 +1,15 @@ + + + diff --git a/dev-tools/idea/.idea/workspace.xml b/dev-tools/idea/.idea/workspace.xml index 6124a8b180f..150a2334ca6 100644 --- a/dev-tools/idea/.idea/workspace.xml +++ b/dev-tools/idea/.idea/workspace.xml @@ -1,6 +1,6 @@ - + + + + + diff --git a/dev-tools/idea/lucene/contrib/ant/ant.iml b/dev-tools/idea/lucene/contrib/ant/ant.iml index 3f14daf7e33..fbe88c99764 100644 --- a/dev-tools/idea/lucene/contrib/ant/ant.iml +++ b/dev-tools/idea/lucene/contrib/ant/ant.iml @@ -6,6 +6,7 @@ + diff --git a/dev-tools/idea/lucene/contrib/queryparser/queryparser.iml b/dev-tools/idea/lucene/contrib/queryparser/queryparser.iml index 9ba52710867..f4a112719e5 100644 --- a/dev-tools/idea/lucene/contrib/queryparser/queryparser.iml +++ b/dev-tools/idea/lucene/contrib/queryparser/queryparser.iml @@ -6,6 +6,7 @@ + diff --git a/dev-tools/idea/lucene/lucene.iml b/dev-tools/idea/lucene/lucene.iml index c6dd9276e7e..80dfc2d4361 100644 --- a/dev-tools/idea/lucene/lucene.iml +++ b/dev-tools/idea/lucene/lucene.iml @@ -6,6 +6,7 @@ + diff --git a/dev-tools/idea/modules/analysis/common/common.iml b/dev-tools/idea/modules/analysis/common/common.iml index 28d4158822a..57888ab9140 100644 --- a/dev-tools/idea/modules/analysis/common/common.iml +++ b/dev-tools/idea/modules/analysis/common/common.iml @@ -7,6 +7,7 @@ + diff --git a/dev-tools/idea/modules/benchmark/benchmark.iml b/dev-tools/idea/modules/benchmark/benchmark.iml index 95ca885a13c..fcd77da13e1 100644 --- a/dev-tools/idea/modules/benchmark/benchmark.iml +++ b/dev-tools/idea/modules/benchmark/benchmark.iml @@ -7,7 +7,6 @@ - diff --git a/dev-tools/idea/solr/contrib/uima/uima.iml b/dev-tools/idea/solr/contrib/uima/uima.iml new file mode 100644 index 00000000000..9eca88b7752 --- /dev/null +++ b/dev-tools/idea/solr/contrib/uima/uima.iml @@ -0,0 +1,31 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/dev-tools/idea/solr/solr.iml b/dev-tools/idea/solr/solr.iml index b8de790a000..218fd039de9 100644 --- a/dev-tools/idea/solr/solr.iml +++ b/dev-tools/idea/solr/solr.iml @@ -10,7 +10,6 @@ - diff --git a/dev-tools/maven/lucene/contrib/ant/pom.xml.template b/dev-tools/maven/lucene/contrib/ant/pom.xml.template new file mode 100644 index 00000000000..da8a45fd38e --- /dev/null +++ b/dev-tools/maven/lucene/contrib/ant/pom.xml.template @@ -0,0 +1,115 @@ + + + 4.0.0 + + org.apache.lucene + lucene-parent + @version@ + ../../pom.xml + + org.apache.lucene + lucene-ant + jar + Lucene Contrib Ant + Ant task to create Lucene indexes + + lucene/contrib/ant + ../../build/contrib/ant + + + + ${project.groupId} + lucene-core + ${project.version} + + + ${project.groupId} + lucene-test-framework + ${project.version} + test-jar + test + + + ${project.groupId} + lucene-analyzers-common + ${project.version} + + + jtidy + jtidy + + + org.apache.ant + ant + + + org.apache.ant + ant-junit + test + + + junit + junit + test + + + + ${build-directory} + ${build-directory}/classes/java + ${build-directory}/classes/test + src/java + src/test + + + src/resources + + + + + ${project.build.testSourceDirectory} + + **/*.java + + + + + + org.codehaus.mojo + appassembler-maven-plugin + + -Xmx128M + flat + + windows + unix + + + + org.apache.lucene.ant.HtmlDocument + HtmlDocument + + + + + + + diff --git a/dev-tools/maven/lucene/contrib/db/bdb-je/pom.xml.template b/dev-tools/maven/lucene/contrib/db/bdb-je/pom.xml.template new file mode 100644 index 00000000000..3f172080f93 --- /dev/null +++ b/dev-tools/maven/lucene/contrib/db/bdb-je/pom.xml.template @@ -0,0 +1,76 @@ + + + 4.0.0 + + org.apache.lucene + lucene-parent + @version@ + ../../../pom.xml + + org.apache.lucene + lucene-bdb-je + jar + Lucene Contrib bdb-je + Berkeley DB based Directory implementation + + lucene/contrib/db/bdb-je + ../../../build/contrib/db/bdb-je + + + + ${project.groupId} + lucene-core + ${project.version} + + + ${project.groupId} + lucene-test-framework + ${project.version} + test-jar + test + + + com.sleepycat + berkeleydb-je + + + junit + junit + test + + + + ${build-directory} + ${build-directory}/classes/java + ${build-directory}/classes/test + src/java + src/test + + + ${project.build.testSourceDirectory} + + **/*.java + + + + + diff --git a/dev-tools/maven/lucene/contrib/db/bdb/pom.xml.template b/dev-tools/maven/lucene/contrib/db/bdb/pom.xml.template new file mode 100644 index 00000000000..6a7d4a05cdd --- /dev/null +++ b/dev-tools/maven/lucene/contrib/db/bdb/pom.xml.template @@ -0,0 +1,87 @@ + + + 4.0.0 + + org.apache.lucene + lucene-parent + @version@ + ../../../pom.xml + + org.apache.lucene + lucene-bdb + jar + Lucene Contrib bdb + Berkeley DB based Directory implementation + + lucene/contrib/db/bdb + ../../../build/contrib/db/bdb + + + + ${project.groupId} + lucene-core + ${project.version} + + + ${project.groupId} + lucene-test-framework + ${project.version} + test-jar + test + + + com.sleepycat + berkeleydb + + + junit + junit + test + + + + ${build-directory} + ${build-directory}/classes/java + ${build-directory}/classes/test + src/java + src/test + + + ${project.build.testSourceDirectory} + + **/*.java + + + + + + org.apache.maven.plugins + maven-surefire-plugin + + + + true + + + + + diff --git a/lucene/contrib/spatial/pom.xml.template b/dev-tools/maven/lucene/contrib/db/pom.xml.template similarity index 75% rename from lucene/contrib/spatial/pom.xml.template rename to dev-tools/maven/lucene/contrib/db/pom.xml.template index 9b81a3e609c..aa3b0daa77f 100644 --- a/lucene/contrib/spatial/pom.xml.template +++ b/dev-tools/maven/lucene/contrib/db/pom.xml.template @@ -1,7 +1,6 @@ - - 4.0.0 org.apache.lucene - lucene-contrib + lucene-parent @version@ + ../../pom.xml org.apache.lucene - lucene-spatial - Lucene Spatial - @version@ - Spatial search package. - jar - + lucene-db-aggregator + Lucene Database aggregator POM + pom + + bdb + bdb-je + + ../../build/contrib/db/lucene-db-aggregator org.apache.maven.plugins - maven-compiler-plugin + maven-deploy-plugin - 1.5 - 1.5 + true - diff --git a/dev-tools/maven/lucene/contrib/demo/pom.xml.template b/dev-tools/maven/lucene/contrib/demo/pom.xml.template new file mode 100644 index 00000000000..0efced332d0 --- /dev/null +++ b/dev-tools/maven/lucene/contrib/demo/pom.xml.template @@ -0,0 +1,124 @@ + + + 4.0.0 + + org.apache.lucene + lucene-parent + @version@ + ../../pom.xml + + org.apache.lucene + lucene-demo + war + Lucene Demos + This is the demo for Apache Lucene Java + + lucene/contrib/demo + ../../build/contrib/demo + + + + ${project.groupId} + lucene-core + ${project.version} + + + ${project.groupId} + lucene-test-framework + ${project.version} + test-jar + test + + + ${project.groupId} + lucene-analyzers-common + ${project.version} + + + javax.servlet + servlet-api + provided + + + junit + junit + test + + + + ${build-directory} + ${build-directory}/classes/java + ${build-directory}/classes/test + src/java + src/test + + + ${project.build.testSourceDirectory} + + **/*.java + + + + + + org.apache.maven.plugins + maven-war-plugin + + src/jsp + true + + + + + org.codehaus.mojo + appassembler-maven-plugin + + -Xmx128M + flat + ${build-directory} + + windows + unix + + + + org.apache.lucene.demo.DeleteFiles + DeleteFiles + + + org.apache.lucene.demo.IndexFiles + IndexFiles + + + org.apache.lucene.demo.IndexHTML + IndexHTML + + + org.apache.lucene.demo.SearchFiles + SearchFiles + + + + + + + diff --git a/dev-tools/maven/lucene/contrib/highlighter/pom.xml.template b/dev-tools/maven/lucene/contrib/highlighter/pom.xml.template new file mode 100644 index 00000000000..82594936bec --- /dev/null +++ b/dev-tools/maven/lucene/contrib/highlighter/pom.xml.template @@ -0,0 +1,84 @@ + + + 4.0.0 + + org.apache.lucene + lucene-parent + @version@ + ../../pom.xml + + org.apache.lucene + lucene-highlighter + jar + Lucene Highlighter + + This is the highlighter for apache lucene java + + + lucene/contrib/highlighter + ../../build/contrib/highlighter + + + + ${project.groupId} + lucene-core + ${project.version} + + + ${project.groupId} + lucene-test-framework + ${project.version} + test-jar + test + + + ${project.groupId} + lucene-memory + ${project.version} + + + ${project.groupId} + lucene-queries + ${project.version} + + + junit + junit + test + + + + ${build-directory} + ${build-directory}/classes/java + ${build-directory}/classes/test + src/java + src/test + + + ${project.build.testSourceDirectory} + + **/*.java + + + + + diff --git a/dev-tools/maven/lucene/contrib/instantiated/pom.xml.template b/dev-tools/maven/lucene/contrib/instantiated/pom.xml.template new file mode 100644 index 00000000000..13f8cb7fc70 --- /dev/null +++ b/dev-tools/maven/lucene/contrib/instantiated/pom.xml.template @@ -0,0 +1,72 @@ + + + 4.0.0 + + org.apache.lucene + lucene-parent + @version@ + ../../pom.xml + + org.apache.lucene + lucene-instantiated + jar + Lucene InstantiatedIndex + InstantiatedIndex, alternative RAM store for small corpora. + + lucene/contrib/instantiated + ../../build/contrib/instantiated + + + + ${project.groupId} + lucene-core + ${project.version} + + + ${project.groupId} + lucene-test-framework + ${project.version} + test-jar + test + + + junit + junit + test + + + + ${build-directory} + ${build-directory}/classes/java + ${build-directory}/classes/test + src/java + src/test + + + ${project.build.testSourceDirectory} + + **/*.java + + + + + diff --git a/dev-tools/maven/lucene/contrib/lucli/pom.xml.template b/dev-tools/maven/lucene/contrib/lucli/pom.xml.template new file mode 100644 index 00000000000..391178ae430 --- /dev/null +++ b/dev-tools/maven/lucene/contrib/lucli/pom.xml.template @@ -0,0 +1,108 @@ + + + 4.0.0 + + org.apache.lucene + lucene-parent + @version@ + ../../pom.xml + + org.apache.lucene + lucene-lucli + jar + Lucene Lucli + Lucene Command Line Interface + + lucene/contrib/lucli + ../../build/contrib/lucli + + + + ${project.groupId} + lucene-core + ${project.version} + + + ${project.groupId} + lucene-test-framework + ${project.version} + test-jar + test + + + ${project.groupId} + lucene-analyzers-common + ${project.version} + + + jline + jline + + + junit + junit + test + + + + ${build-directory} + ${build-directory}/classes/java + ${build-directory}/classes/test + src/java + src/test + + + ${project.build.testSourceDirectory} + + **/*.java + + + + + + org.apache.maven.plugins + maven-javadoc-plugin + + true + + + + org.codehaus.mojo + appassembler-maven-plugin + + -Xmx128M + flat + + windows + unix + + + + lucli.Lucli + lucli + + + + + + + diff --git a/dev-tools/maven/lucene/contrib/memory/pom.xml.template b/dev-tools/maven/lucene/contrib/memory/pom.xml.template new file mode 100644 index 00000000000..b82f713fba7 --- /dev/null +++ b/dev-tools/maven/lucene/contrib/memory/pom.xml.template @@ -0,0 +1,74 @@ + + + 4.0.0 + + org.apache.lucene + lucene-parent + @version@ + ../../pom.xml + + org.apache.lucene + lucene-memory + jar + Lucene Memory + + High-performance single-document index to compare against Query + + + lucene/contrib/memory + ../../build/contrib/memory + + + + ${project.groupId} + lucene-core + ${project.version} + + + ${project.groupId} + lucene-test-framework + ${project.version} + test-jar + test + + + junit + junit + test + + + + ${build-directory} + ${build-directory}/classes/java + ${build-directory}/classes/test + src/java + src/test + + + ${project.build.testSourceDirectory} + + **/*.java + + + + + diff --git a/dev-tools/maven/lucene/contrib/misc/pom.xml.template b/dev-tools/maven/lucene/contrib/misc/pom.xml.template new file mode 100644 index 00000000000..d076f688ca5 --- /dev/null +++ b/dev-tools/maven/lucene/contrib/misc/pom.xml.template @@ -0,0 +1,128 @@ + + + 4.0.0 + + org.apache.lucene + lucene-parent + @version@ + ../../pom.xml + + org.apache.lucene + lucene-misc + jar + Lucene Miscellaneous + Miscellaneous Lucene extensions + + lucene/contrib/misc + ../../build/contrib/misc + + + + ${project.groupId} + lucene-core + ${project.version} + + + ${project.groupId} + lucene-core + ${project.version} + test-jar + test + + + ${project.groupId} + lucene-test-framework + ${project.version} + test-jar + test + + + ${project.groupId} + lucene-analyzers-common + ${project.version} + + + junit + junit + test + + + + ${build-directory} + ${build-directory}/classes/java + ${build-directory}/classes/test + src/java + src/test + + + ${project.build.testSourceDirectory} + + **/*.java + + + + + + org.codehaus.mojo + appassembler-maven-plugin + + -Xmx128M + flat + + windows + unix + + + + org.apache.lucene.index.FieldNormModifier + FieldNormModifier + + + org.apache.lucene.index.IndexSplitter + IndexSplitter + + + org.apache.lucene.index.MultiPassIndexSplitter + MultiPassIndexSplitter + + + org.apache.lucene.misc.GetTermInfo + GetTermInfo + + + org.apache.lucene.misc.HighFreqTerms + HighFreqTerms + + + org.apache.lucene.misc.IndexMergeTool + IndexMergeTool + + + org.apache.lucene.misc.LengthNormModifier + LengthNormModifier + + + + + + + diff --git a/dev-tools/maven/lucene/contrib/pom.xml.template b/dev-tools/maven/lucene/contrib/pom.xml.template new file mode 100644 index 00000000000..f16895b084d --- /dev/null +++ b/dev-tools/maven/lucene/contrib/pom.xml.template @@ -0,0 +1,62 @@ + + + 4.0.0 + + org.apache.lucene + lucene-parent + @version@ + ../pom.xml + + org.apache.lucene + lucene-contrib-aggregator + Lucene Contrib aggregator POM + pom + + ant + db + demo + highlighter + instantiated + lucli + memory + misc + queries + queryparser + spatial + spellchecker + swing + wordnet + xml-query-parser + + + build/lucene-contrib-aggregator + + + org.apache.maven.plugins + maven-deploy-plugin + + true + + + + + diff --git a/dev-tools/maven/lucene/contrib/queries/pom.xml.template b/dev-tools/maven/lucene/contrib/queries/pom.xml.template new file mode 100644 index 00000000000..3286526a3bf --- /dev/null +++ b/dev-tools/maven/lucene/contrib/queries/pom.xml.template @@ -0,0 +1,85 @@ + + + 4.0.0 + + org.apache.lucene + lucene-parent + @version@ + ../../pom.xml + + org.apache.lucene + lucene-queries + jar + Lucene Queries + + Queries - various query object exotica not in core + + + lucene/contrib/queries + ../../build/contrib/queries + + + + ${project.groupId} + lucene-core + ${project.version} + + + ${project.groupId} + lucene-core + ${project.version} + test-jar + test + + + ${project.groupId} + lucene-test-framework + ${project.version} + test-jar + test + + + jakarta-regexp + jakarta-regexp + + + junit + junit + test + + + + ${build-directory} + ${build-directory}/classes/java + ${build-directory}/classes/test + src/java + src/test + + + ${project.build.testSourceDirectory} + + **/*.java + + + + + diff --git a/dev-tools/maven/lucene/contrib/queryparser/pom.xml.template b/dev-tools/maven/lucene/contrib/queryparser/pom.xml.template new file mode 100644 index 00000000000..ecbaf9e79c4 --- /dev/null +++ b/dev-tools/maven/lucene/contrib/queryparser/pom.xml.template @@ -0,0 +1,86 @@ + + + 4.0.0 + + org.apache.lucene + lucene-parent + @version@ + ../../pom.xml + + org.apache.lucene + lucene-queryparser + jar + Lucene Query Parser + + This is the Flexible Query Parser for apache lucene java + + + lucene/contrib/queryparser + ../../build/contrib/queryparser + + + + ${project.groupId} + lucene-core + ${project.version} + + + ${project.groupId} + lucene-core + ${project.version} + test-jar + test + + + ${project.groupId} + lucene-test-framework + ${project.version} + test-jar + test + + + junit + junit + test + + + + ${build-directory} + ${build-directory}/classes/java + ${build-directory}/classes/test + src/java + src/test + + + src/resources + + + + + ${project.build.testSourceDirectory} + + **/*.java + + + + + diff --git a/dev-tools/maven/lucene/contrib/spatial/pom.xml.template b/dev-tools/maven/lucene/contrib/spatial/pom.xml.template new file mode 100644 index 00000000000..0db411f23c6 --- /dev/null +++ b/dev-tools/maven/lucene/contrib/spatial/pom.xml.template @@ -0,0 +1,77 @@ + + + 4.0.0 + + org.apache.lucene + lucene-parent + @version@ + ../../pom.xml + + org.apache.lucene + lucene-spatial + jar + Lucene Spatial + Spatial search package + + lucene/contrib/spatial + ../../build/contrib/spatial + + + + ${project.groupId} + lucene-core + ${project.version} + + + ${project.groupId} + lucene-test-framework + ${project.version} + test-jar + test + + + ${project.groupId} + lucene-queries + ${project.version} + + + junit + junit + test + + + + ${build-directory} + ${build-directory}/classes/java + ${build-directory}/classes/test + src/java + src/test + + + ${project.build.testSourceDirectory} + + **/*.java + + + + + diff --git a/dev-tools/maven/lucene/contrib/spellchecker/pom.xml.template b/dev-tools/maven/lucene/contrib/spellchecker/pom.xml.template new file mode 100644 index 00000000000..845e196bf64 --- /dev/null +++ b/dev-tools/maven/lucene/contrib/spellchecker/pom.xml.template @@ -0,0 +1,77 @@ + + + 4.0.0 + + org.apache.lucene + lucene-parent + @version@ + ../../pom.xml + + org.apache.lucene + lucene-spellchecker + jar + Lucene Spellchecker + Spell Checker + + lucene/contrib/spellchecker + ../../build/contrib/spellchecker + + + + ${project.groupId} + lucene-core + ${project.version} + + + ${project.groupId} + lucene-test-framework + ${project.version} + test-jar + test + + + ${project.groupId} + lucene-analyzers-common + ${project.version} + + + junit + junit + test + + + + ${build-directory} + ${build-directory}/classes/java + ${build-directory}/classes/test + src/java + src/test + + + ${project.build.testSourceDirectory} + + **/*.java + + + + + diff --git a/dev-tools/maven/lucene/contrib/swing/pom.xml.template b/dev-tools/maven/lucene/contrib/swing/pom.xml.template new file mode 100644 index 00000000000..649e422c4e1 --- /dev/null +++ b/dev-tools/maven/lucene/contrib/swing/pom.xml.template @@ -0,0 +1,101 @@ + + + 4.0.0 + + org.apache.lucene + lucene-parent + @version@ + ../../pom.xml + + org.apache.lucene + lucene-swing + jar + Lucene Swing + Swing Models + + lucene/contrib/swing + ../../build/contrib/swing + + + + ${project.groupId} + lucene-core + ${project.version} + + + ${project.groupId} + lucene-test-framework + ${project.version} + test-jar + test + + + ${project.groupId} + lucene-analyzers-common + ${project.version} + + + junit + junit + test + + + + ${build-directory} + ${build-directory}/classes/java + ${build-directory}/classes/test + src/java + src/test + + + ${project.build.testSourceDirectory} + + **/*.java + + + + + + org.codehaus.mojo + appassembler-maven-plugin + + -Xmx128M + flat + + windows + unix + + + + org.apache.lucene.swing.models.ListSearcherSimulator + ListSearchSimulator + + + org.apache.lucene.swing.models.TableSearcherSimulator + TableSearchSimulator + + + + + + + diff --git a/dev-tools/maven/lucene/contrib/wordnet/pom.xml.template b/dev-tools/maven/lucene/contrib/wordnet/pom.xml.template new file mode 100644 index 00000000000..ed72a977be7 --- /dev/null +++ b/dev-tools/maven/lucene/contrib/wordnet/pom.xml.template @@ -0,0 +1,105 @@ + + + 4.0.0 + + org.apache.lucene + lucene-parent + @version@ + ../../pom.xml + + org.apache.lucene + lucene-wordnet + jar + Lucene Wordnet + WordNet + + lucene/contrib/wordnet + ../../build/contrib/wordnet + + + + ${project.groupId} + lucene-core + ${project.version} + + + ${project.groupId} + lucene-test-framework + ${project.version} + test-jar + test + + + ${project.groupId} + lucene-analyzers-common + ${project.version} + + + junit + junit + test + + + + ${build-directory} + ${build-directory}/classes/java + ${build-directory}/classes/test + src/java + src/test + + + ${project.build.testSourceDirectory} + + **/*.java + + + + + + org.codehaus.mojo + appassembler-maven-plugin + + -Xmx128M + flat + + windows + unix + + + + org.apache.lucene.wordnet.SynExpand + SynExpand + + + org.apache.lucene.wordnet.SynExpand + SynExpand + + + org.apache.lucene.wordnet.Syns2Index + Syns2Index + + + + + + + diff --git a/dev-tools/maven/lucene/contrib/xml-query-parser/pom.xml.template b/dev-tools/maven/lucene/contrib/xml-query-parser/pom.xml.template new file mode 100644 index 00000000000..7a543713bdd --- /dev/null +++ b/dev-tools/maven/lucene/contrib/xml-query-parser/pom.xml.template @@ -0,0 +1,82 @@ + + + 4.0.0 + + org.apache.lucene + lucene-parent + @version@ + ../../pom.xml + + org.apache.lucene + lucene-xml-query-parser + jar + Lucene XML Query Parser + XML query parser + + lucene/contrib/xml-query-parser + ../../build/contrib/xml-query-parser + + + + ${project.groupId} + lucene-core + ${project.version} + + + ${project.groupId} + lucene-test-framework + ${project.version} + test-jar + test + + + ${project.groupId} + lucene-queries + ${project.version} + + + javax.servlet + servlet-api + provided + + + junit + junit + test + + + + ${build-directory} + ${build-directory}/classes/java + ${build-directory}/classes/test + src/java + src/test + + + ${project.build.testSourceDirectory} + + **/*.java + + + + + diff --git a/lucene/contrib/db/bdb/pom.xml.template b/dev-tools/maven/lucene/pom.xml.template similarity index 63% rename from lucene/contrib/db/bdb/pom.xml.template rename to dev-tools/maven/lucene/pom.xml.template index 04696d96060..19fda15d708 100644 --- a/lucene/contrib/db/bdb/pom.xml.template +++ b/dev-tools/maven/lucene/pom.xml.template @@ -1,7 +1,6 @@ - + xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" + xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd"> + 4.0.0 + + org.apache.lucene + lucene-parent + @version@ + ../pom.xml + + org.apache.lucene + lucene-core + jar + Lucene Core + Apache Lucene Java Core + + lucene + ../build + + + + junit + junit + test + + + org.apache.ant + ant + test + + + org.apache.ant + ant-junit + test + + + + ${build-directory} + ${build-directory}/classes/java + ${build-directory}/classes/test + java + test + + + ${project.build.testSourceDirectory} + + **/*.java + + + + + + org.apache.maven.plugins + maven-jar-plugin + + + + test-jar + + + + + + org.apache.maven.plugins + maven-surefire-plugin + + + ${project.version} + + + + + org.codehaus.mojo + appassembler-maven-plugin + + -Xmx128M + flat + + windows + unix + + + + org.apache.lucene.index.CheckIndex + CheckIndex + + + org.apache.lucene.index.IndexReader + IndexReader + + + org.apache.lucene.store.LockStressTest + LockStressTest + + + org.apache.lucene.store.LockVerifyServer + IndexReader + + + org.apache.lucene.util.English + English + + + + + + org.codehaus.mojo + build-helper-maven-plugin + + + add-test-source + generate-test-sources + + add-test-source + + + + test-framework + + + + + + + + diff --git a/dev-tools/maven/lucene/src/test-framework/pom.xml.template b/dev-tools/maven/lucene/src/test-framework/pom.xml.template new file mode 100644 index 00000000000..e391e5f83ba --- /dev/null +++ b/dev-tools/maven/lucene/src/test-framework/pom.xml.template @@ -0,0 +1,89 @@ + + + 4.0.0 + + org.apache.lucene + lucene-parent + @version@ + ../../pom.xml + + org.apache.lucene + lucene-test-framework + jar + Lucene Test Framework + Apache Lucene Java Test Framework + + lucene/src/test-framework + ../../build + + + + ${project.groupId} + lucene-core + ${project.version} + test + + + junit + junit + test + + + org.apache.ant + ant-junit + test + + + + ${build-directory}/classes/test-framework + ${build-directory}/classes/test-framework + . + + + ${project.build.testSourceDirectory} + + **/*.java + + + + + + org.apache.maven.plugins + maven-jar-plugin + + + + test-jar + + + + + + org.apache.maven.plugins + maven-surefire-plugin + + true + + + + + diff --git a/dev-tools/maven/modules/analysis/common/pom.xml.template b/dev-tools/maven/modules/analysis/common/pom.xml.template new file mode 100644 index 00000000000..19f8615459e --- /dev/null +++ b/dev-tools/maven/modules/analysis/common/pom.xml.template @@ -0,0 +1,123 @@ + + + 4.0.0 + + org.apache.lucene + lucene-parent + @version@ + ../../../lucene/pom.xml + + org.apache.lucene + lucene-analyzers-common + jar + Lucene Common Analyzers + Additional Analyzers + + modules/analysis/common + ../build/common + + + + ${project.groupId} + lucene-core + ${project.version} + + + ${project.groupId} + lucene-core + ${project.version} + test-jar + test + + + ${project.groupId} + lucene-test-framework + ${project.version} + test-jar + test + + + junit + junit + test + + + + ${build-directory} + ${build-directory}/classes/java + ${build-directory}/classes/test + src/java + src/test + + + src/resources + + + + + ${project.build.testSourceDirectory} + + **/*.java + + + + + + org.apache.maven.plugins + maven-jar-plugin + + + + test-jar + + + + + + org.codehaus.mojo + appassembler-maven-plugin + + -Xmx128M + flat + + windows + unix + + + + org.apache.lucene.analysis.charfilter.HtmlStripCharFilter + HtmlStripCharFilter + + + org.apache.lucene.analysis.en.PorterStemmer + EnglishPorterStemmer + + + org.tartarus.snowball.TestApp + SnowballTestApp + + + + + + + diff --git a/dev-tools/maven/modules/analysis/icu/pom.xml.template b/dev-tools/maven/modules/analysis/icu/pom.xml.template new file mode 100644 index 00000000000..363364c88b6 --- /dev/null +++ b/dev-tools/maven/modules/analysis/icu/pom.xml.template @@ -0,0 +1,103 @@ + + + 4.0.0 + + org.apache.lucene + lucene-parent + @version@ + ../../../lucene/pom.xml + + org.apache.lucene + lucene-analyzers-icu + jar + Lucene ICU Analysis Components + + Provides integration with ICU (International Components for Unicode) for + stronger Unicode and internationalization support. + + + modules/analysis/icu + ../build/icu + + + + ${project.groupId} + lucene-core + ${project.version} + + + ${project.groupId} + lucene-core + ${project.version} + test-jar + test + + + ${project.groupId} + lucene-test-framework + ${project.version} + test-jar + test + + + ${project.groupId} + lucene-analyzers-common + ${project.version} + + + ${project.groupId} + lucene-analyzers-common + ${project.version} + test-jar + test + + + com.ibm.icu + icu4j + + + junit + junit + test + + + + ${build-directory} + ${build-directory}/classes/java + ${build-directory}/classes/test + src/java + src/test + + + src/resources + + + + + ${project.build.testSourceDirectory} + + **/*.java + + + + + diff --git a/dev-tools/maven/modules/analysis/phonetic/pom.xml.template b/dev-tools/maven/modules/analysis/phonetic/pom.xml.template new file mode 100644 index 00000000000..c3d4f4a4c06 --- /dev/null +++ b/dev-tools/maven/modules/analysis/phonetic/pom.xml.template @@ -0,0 +1,90 @@ + + + 4.0.0 + + org.apache.lucene + lucene-parent + @version@ + ../../../lucene/pom.xml + + org.apache.lucene + lucene-analyzers-phonetic + jar + Lucene Phonetic Filters + + Provides phonetic encoding via Commons Codec. + + + modules/analysis/phonetic + ../build/phonetic + + + + ${project.groupId} + lucene-core + ${project.version} + + + ${project.groupId} + lucene-test-framework + ${project.version} + test-jar + test + + + ${project.groupId} + lucene-analyzers-common + ${project.version} + + + ${project.groupId} + lucene-analyzers-common + ${project.version} + test-jar + test + + + commons-codec + commons-codec + + + junit + junit + test + + + + ${build-directory} + ${build-directory}/classes/java + ${build-directory}/classes/test + src/java + src/test + + + ${project.build.testSourceDirectory} + + **/*.java + + + + + diff --git a/lucene/lucene-demos-pom.xml.template b/dev-tools/maven/modules/analysis/pom.xml.template similarity index 56% rename from lucene/lucene-demos-pom.xml.template rename to dev-tools/maven/modules/analysis/pom.xml.template index 8f4d6321a4a..f7df04f98d6 100644 --- a/lucene/lucene-demos-pom.xml.template +++ b/dev-tools/maven/modules/analysis/pom.xml.template @@ -1,7 +1,6 @@ - + xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" + xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd"> - 4.0.0 org.apache.lucene lucene-parent @version@ + ../../lucene/pom.xml org.apache.lucene - lucene-demos - Lucene Demos - @version@ - This is the demo for Apache Lucene Java - jar - - - org.apache.lucene - lucene-core - @version@ - - + lucene-analysis-modules-aggregator + Lucene Analysis Modules aggregator POM + pom + + common + icu + phonetic + smartcn + stempel + + + build/lucene-analysis-modules-aggregator + + + org.apache.maven.plugins + maven-deploy-plugin + + true + + + + diff --git a/dev-tools/maven/modules/analysis/smartcn/pom.xml.template b/dev-tools/maven/modules/analysis/smartcn/pom.xml.template new file mode 100644 index 00000000000..26a7e4f8a13 --- /dev/null +++ b/dev-tools/maven/modules/analysis/smartcn/pom.xml.template @@ -0,0 +1,82 @@ + + + 4.0.0 + + org.apache.lucene + lucene-parent + @version@ + ../../../lucene/pom.xml + + org.apache.lucene + lucene-analyzers-smartcn + jar + Lucene Smart Chinese Analyzer + Smart Chinese Analyzer + + modules/analysis/smartcn + ../build/smartcn + + + + ${project.groupId} + lucene-core + ${project.version} + + + ${project.groupId} + lucene-test-framework + ${project.version} + test-jar + test + + + ${project.groupId} + lucene-analyzers-common + ${project.version} + + + junit + junit + test + + + + ${build-directory} + ${build-directory}/classes/java + ${build-directory}/classes/test + src/java + src/test + + + src/resources + + + + + ${project.build.testSourceDirectory} + + **/*.java + + + + + diff --git a/dev-tools/maven/modules/analysis/stempel/pom.xml.template b/dev-tools/maven/modules/analysis/stempel/pom.xml.template new file mode 100644 index 00000000000..cacb74d7df9 --- /dev/null +++ b/dev-tools/maven/modules/analysis/stempel/pom.xml.template @@ -0,0 +1,82 @@ + + + 4.0.0 + + org.apache.lucene + lucene-parent + @version@ + ../../../lucene/pom.xml + + org.apache.lucene + lucene-analyzers-stempel + jar + Lucene Stempel Analyzer + Stempel Analyzer + + modules/analysis/stempel + ../build/stempel + + + + ${project.groupId} + lucene-core + ${project.version} + + + ${project.groupId} + lucene-test-framework + ${project.version} + test-jar + test + + + ${project.groupId} + lucene-analyzers-common + ${project.version} + + + junit + junit + test + + + + ${build-directory} + ${build-directory}/classes/java + ${build-directory}/classes/test + src/java + src/test + + + src/resources + + + + + ${project.build.testSourceDirectory} + + **/*.java + + + + + diff --git a/dev-tools/maven/modules/benchmark/pom.xml.template b/dev-tools/maven/modules/benchmark/pom.xml.template new file mode 100755 index 00000000000..184c190434a --- /dev/null +++ b/dev-tools/maven/modules/benchmark/pom.xml.template @@ -0,0 +1,157 @@ + + + 4.0.0 + + org.apache.lucene + lucene-parent + @version@ + ../../lucene/pom.xml + + org.apache.lucene + lucene-benchmark + jar + Lucene Benchmark + Lucene Benchmarking Module + + modules/benchmark + build + + + + ${project.groupId} + lucene-core + ${project.version} + + + ${project.groupId} + lucene-test-framework + ${project.version} + test-jar + test + + + ${project.groupId} + lucene-analyzers-common + ${project.version} + + + ${project.groupId} + lucene-demo + ${project.version} + classes + + + ${project.groupId} + lucene-highlighter + ${project.version} + + + ${project.groupId} + lucene-memory + ${project.version} + + + commons-beanutils + commons-beanutils + + + commons-collections + commons-collections + + + org.apache.commons + commons-compress + + + commons-digester + commons-digester + + + commons-logging + commons-logging + + + xerces + xercesImpl + + + xml-apis + xml-apis + + + junit + junit + test + + + + ${build-directory} + ${build-directory}/classes/java + ${build-directory}/classes/test + src/java + src/test + + + ${project.build.testSourceDirectory} + + **/*.java + + + + + + org.codehaus.mojo + appassembler-maven-plugin + + -Xmx128M + flat + + windows + unix + + + + org.apache.lucene.benchmark.byTask.Benchmark + Benchmark + + + org.apache.lucene.benchmark.quality.trec.QueryDriver + QueryDriver + + + org.apache.lucene.benchmark.quality.utils.QualityQueriesFinder + QualityQueriesFinder + + + org.apache.lucene.benchmark.utils.ExtractReuters + ExtractReuters + + + org.apache.lucene.benchmark.utils.ExtractWikipedia + ExtractWikipedia + + + + + + + diff --git a/lucene/contrib/instantiated/pom.xml.template b/dev-tools/maven/modules/pom.xml.template similarity index 75% rename from lucene/contrib/instantiated/pom.xml.template rename to dev-tools/maven/modules/pom.xml.template index b65d04946f0..5681866970b 100644 --- a/lucene/contrib/instantiated/pom.xml.template +++ b/dev-tools/maven/modules/pom.xml.template @@ -1,7 +1,6 @@ - - 4.0.0 org.apache.lucene - lucene-contrib + lucene-parent @version@ + ../lucene/pom.xml org.apache.lucene - lucene-instantiated - Lucene InstantiatedIndex - @version@ - InstantiatedIndex, alternative RAM store for small corpora. - jar - + lucene-modules-aggregator + Lucene Modules aggregator POM + pom + + analysis + benchmark + + build/lucene-modules-aggregator org.apache.maven.plugins - maven-compiler-plugin + maven-deploy-plugin - 1.5 - 1.5 + true - diff --git a/dev-tools/maven/pom.xml.template b/dev-tools/maven/pom.xml.template new file mode 100644 index 00000000000..9bf98955a61 --- /dev/null +++ b/dev-tools/maven/pom.xml.template @@ -0,0 +1,794 @@ + + + 4.0.0 + + org.apache + apache + 8 + + org.apache.lucene + lucene-solr-grandparent + @version@ + pom + Grandparent POM for Apache Lucene Java and Apache Solr + Parent POM for Apache Lucene Java and Apache Solr + http://lucene.apache.org/java + + lucene + modules + solr + + + 4.0.0 + yyyy-MM-dd HH:mm:ss + 1.5 + 6.1.26 + 1.5.5 + 0.8 + ${project.version} + + + JIRA + http://issues.apache.org/jira/browse/LUCENE + + + Hudson + http://lucene.zones.apache.org:8080/hudson/job/Lucene-Nightly/ + + + + General List + general-subscribe@lucene.apache.org + general-unsubscribe@lucene.apache.org + + http://mail-archives.apache.org/mod_mbox/lucene-general/ + + + + Java User List + java-user-subscribe@lucene.apache.org + java-user-unsubscribe@lucene.apache.org + + http://mail-archives.apache.org/mod_mbox/lucene-java-user/ + + + + Java Developer List + dev-subscribe@lucene.apache.org + dev-unsubscribe@lucene.apache.org + http://mail-archives.apache.org/mod_mbox/lucene-dev/ + + + Java Commits List + commits-subscribe@lucene.apache.org + commits-unsubscribe@lucene.apache.org + + http://mail-archives.apache.org/mod_mbox/lucene-java-commits/ + + + + 2000 + + + scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory} + + + scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory} + + + http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory} + + + + + Apache 2 + http://www.apache.org/licenses/LICENSE-2.0.txt + + + + + carrot2.org + Carrot2 Maven2 repository + http://download.carrot2.org/maven2/ + + never + + + + apache.snapshots + Apache Snapshot Repository + http://repository.apache.org/snapshots + + false + + + never + + + + + + + com.ibm.icu + icu4j + 4.6 + + + com.sleepycat + berkeleydb + 4.7.25 + + + com.sleepycat + berkeleydb-je + 3.3.93 + + + commons-beanutils + commons-beanutils + 1.7.0 + + + commons-codec + commons-codec + 1.4 + + + commons-collections + commons-collections + 3.2.1 + + + commons-digester + commons-digester + 1.7 + + + commons-fileupload + commons-fileupload + 1.2.1 + + + commons-io + commons-io + 1.4 + + + commons-httpclient + commons-httpclient + 3.1 + + + commons-lang + commons-lang + 2.4 + + + commons-logging + commons-logging + 1.1.1 + + + jakarta-regexp + jakarta-regexp + 1.4 + + + javax.activation + activation + 1.1 + + + javax.mail + mail + 1.4.1 + + + jline + jline + 0.9.1 + + + jtidy + jtidy + 4aug2000r7-dev + + + junit + junit + 4.7 + + + org.apache.ant + ant + 1.7.1 + + + org.apache.ant + ant-junit + 1.7.1 + + + org.apache.commons + commons-compress + 1.1 + + + org.apache.geronimo.specs + geronimo-stax-api_1.0_spec + 1.0.1 + + + org.apache.solr + solr-commons-csv + ${project.version} + + + org.apache.solr + solr-noggit + ${project.version} + + + org.apache.tika + tika-core + ${tika.version} + + + org.apache.tika + tika-parsers + ${tika.version} + + + org.apache.solr + uima-alchemy-annotator + ${uima.version} + + + org.apache.solr + uima-OpenCalaisAnnotator + ${uima.version} + + + org.apache.solr + uima-Tagger + ${uima.version} + + + org.apache.solr + uima-WhitespaceTokenizer + ${uima.version} + + + org.apache.solr + uima-uimaj-core + ${uima.version} + + + org.apache.velocity + velocity + 1.6.4 + + + org.apache.velocity + velocity-tools + 2.0 + + + org.apache.zookeeper + zookeeper + 3.3.1 + + + org.carrot2 + carrot2-core + 3.4.2 + + + org.codehaus.woodstox + wstx-asl + 3.2.7 + + + org.easymock + easymock + 2.2 + + + org.mortbay.jetty + jetty + ${jetty.version} + + + org.mortbay.jetty + jetty-util + ${jetty.version} + + + org.mortbay.jetty + jsp-2.1-glassfish + 2.1.v20091210 + + + org.mortbay.jetty + jsp-2.1-jetty + ${jetty.version} + + + org.mortbay.jetty + jsp-api-2.1-glassfish + 2.1.v20091210 + + + org.slf4j + jcl-over-slf4j + ${slf4j.version} + + + org.slf4j + log4j-over-slf4j + ${slf4j.version} + + + org.slf4j + slf4j-api + ${slf4j.version} + + + org.slf4j + slf4j-jdk14 + ${slf4j.version} + + + org.slf4j + slf4j-simple + ${slf4j.version} + + + xerces + xercesImpl + 2.9.1-patched-XERCESJ-1257 + + + xml-apis + xml-apis + 2.9.0 + + + javax.servlet + servlet-api + 2.4 + + + + + lucene/build/lucene-parent + + + + org.apache.maven.plugins + maven-antrun-plugin + 1.6 + + + org.apache.maven.plugins + maven-clean-plugin + 2.4.1 + + + org.apache.maven.plugins + maven-compiler-plugin + 2.3.2 + + ${java.compat.version} + ${java.compat.version} + + + + org.apache.maven.plugins + maven-deploy-plugin + 2.5 + + + org.apache.maven.plugins + maven-enforcer-plugin + 1.0 + + + org.apache.maven.plugins + maven-install-plugin + 2.3.1 + + + org.apache.maven.plugins + maven-jar-plugin + 2.3.1 + + + + + + + + ${project.groupId} + ${project.groupId} + ${project.name} + + ${base.specification.version}.${now.version} + The Apache Software Foundation + + ${project.version} ${svn.revision} - ${user.name} - ${now.timestamp} + The Apache Software Foundation + ${java.compat.version} + ${java.compat.version} + + + + + + org.apache.maven.plugins + maven-resources-plugin + 2.4.3 + + + org.apache.maven.plugins + maven-surefire-plugin + 2.7.1 + + plain + ${project.build.testOutputDirectory} + + temp + 1 + ${tests.codec} + ${tests.directory} + ${tests.iter} + ${tests.locale} + ${tests.luceneMatchVersion} + ${tests.multiplier} + ${tests.nightly} + ${tests.seed} + ${tests.timezone} + + + + + org.apache.maven.plugins + maven-war-plugin + 2.1.1 + + + + + + + + ${project.groupId} + ${project.groupId} + ${project.name} + + ${base.specification.version}.${now.version} + The Apache Software Foundation + + ${project.version} ${svn.revision} - ${user.name} - ${now.timestamp} + The Apache Software Foundation + ${java.compat.version} + ${java.compat.version} + + + + + + org.codehaus.mojo + appassembler-maven-plugin + 1.1 + + + org.codehaus.mojo + build-helper-maven-plugin + 1.5 + + + org.codehaus.mojo + buildnumber-maven-plugin + 1.0-beta-4 + + + org.mortbay.jetty + maven-jetty-plugin + ${jetty.version} + + + org.codehaus.gmaven + gmaven-plugin + 1.3 + + + + + + org.codehaus.gmaven + gmaven-plugin + + + generate-timestamps + validate + + execute + + + + project.properties['now.timestamp'] = "${maven.build.timestamp}" + project.properties['now.version'] = ("${maven.build.timestamp}" =~ /[- :]/).replaceAll(".") + project.properties['now.year'] = "${maven.build.timestamp}".substring(0, 4) + + + + + + + org.codehaus.mojo + buildnumber-maven-plugin + + + validate + + create + + + + + false + false + true + svn.revision + + + + org.apache.maven.plugins + maven-enforcer-plugin + + + enforce-java-compat-version-and-maven-2.2.1 + + enforce + + + + + Java ${java.compat.version}+ is required. + [${java.compat.version},) + + + Maven 2.2.1+ is required. + [2.2.1,) + + + + + + + + + + + + bootstrap + + + + org.apache.maven.plugins + maven-antrun-plugin + + + get-jars-and-poms + install + + run + + + + + + + + + + + + org.apache.maven.plugins + maven-install-plugin + + + install-icu4j + install + + install-file + + + com.ibm.icu + icu4j + 4.6 + jar + modules/analysis/icu/lib/icu4j-4_6.jar + + + + install-xercesImpl + install + + install-file + + + xerces + xercesImpl + 2.9.1-patched-XERCESJ-1257 + jar + modules/benchmark/lib/xercesImpl-2.9.1-patched-XERCESJ-1257.jar + + + + install-xml-apis + install + + install-file + + + xml-apis + xml-apis + 2.9.0 + jar + modules/benchmark/lib/xml-apis-2.9.0.jar + + + + install-berkeleydb + install + + install-file + + + com.sleepycat + berkeleydb + 4.7.25 + jar + lucene/contrib/db/bdb/lib/db-4.7.25.jar + + + + install-berkeleydb-je + install + + install-file + + + com.sleepycat + berkeleydb-je + 3.3.93 + jar + lucene/contrib/db/bdb-je/lib/je-3.3.93.jar + + + + install-solr-commons-csv + install + + install-file + + + org.apache.solr + solr-commons-csv + ${project.version} + jar + solr/lib/commons-csv-1.0-SNAPSHOT-r966014.jar + + + + install-solr-noggit + install + + install-file + + + org.apache.solr + solr-noggit + ${project.version} + jar + solr/lib/apache-solr-noggit-r944541.jar + + + + install-solr-uima-alchemy-annotator + install + + install-file + + + org.apache.solr + uima-alchemy-annotator + ${uima.version} + jar + solr/contrib/uima/lib/uima-an-alchemy.jar + + + + install-solr-uima-OpenCalaisAnnotator + install + + install-file + + + org.apache.solr + uima-OpenCalaisAnnotator + ${uima.version} + jar + solr/contrib/uima/lib/uima-an-calais.jar + + + + install-solr-uima-Tagger + install + + install-file + + + org.apache.solr + uima-Tagger + ${uima.version} + jar + solr/contrib/uima/lib/uima-an-tagger.jar + + + + install-solr-uima-WhitespaceTokenizer + install + + install-file + + + org.apache.solr + uima-WhitespaceTokenizer + ${uima.version} + jar + solr/contrib/uima/lib/uima-an-wst.jar + + + + install-solr-uima-uimaj-core + install + + install-file + + + org.apache.solr + uima-uimaj-core + ${uima.version} + jar + solr/contrib/uima/lib/uima-core.jar + + + + + + + + + diff --git a/dev-tools/maven/solr/contrib/analysis-extras/pom.xml.template b/dev-tools/maven/solr/contrib/analysis-extras/pom.xml.template new file mode 100644 index 00000000000..0ff4b13d8fe --- /dev/null +++ b/dev-tools/maven/solr/contrib/analysis-extras/pom.xml.template @@ -0,0 +1,116 @@ + + + 4.0.0 + + org.apache.solr + solr-parent + @version@ + ../../pom.xml + + org.apache.solr + solr-analysis-extras + jar + Apache Solr Analysis Extras + Apache Solr Analysis Extras + + solr/contrib/analysis-extras + build + 4.0 + + + + ${project.groupId} + solr-core + ${project.version} + + + ${project.groupId} + solr-core + ${project.version} + test-jar + test + + + org.apache.lucene + lucene-analyzers-common + ${project.version} + + + org.apache.lucene + lucene-analyzers-icu + ${project.version} + + + org.apache.lucene + lucene-analyzers-smartcn + ${project.version} + + + org.apache.lucene + lucene-analyzers-stempel + ${project.version} + + + org.apache.lucene + lucene-core + ${project.version} + + + org.apache.lucene + lucene-test-framework + ${project.version} + test-jar + test + + + junit + junit + test + + + + ${build-directory} + ${build-directory}/classes + ${build-directory}/test-classes + src/java + src/test + + + test-files + + + ../../src/test-files + + + + + org.apache.maven.plugins + maven-surefire-plugin + + + ../../../../testlogging.properties + + + + + + diff --git a/dev-tools/maven/solr/contrib/clustering/pom.xml.template b/dev-tools/maven/solr/contrib/clustering/pom.xml.template new file mode 100644 index 00000000000..3b47b219e92 --- /dev/null +++ b/dev-tools/maven/solr/contrib/clustering/pom.xml.template @@ -0,0 +1,103 @@ + + + 4.0.0 + + org.apache.solr + solr-parent + @version@ + ../../pom.xml + + org.apache.solr + solr-clustering + jar + Apache Solr Clustering + Apache Solr Clustering + + solr/contrib/clustering + build + 4.0 + + + + ${project.groupId} + solr-core + ${project.version} + + + ${project.groupId} + solr-core + ${project.version} + test-jar + test + + + ${project.groupId} + solr-solrj + ${project.version} + + + org.apache.lucene + lucene-analyzers-common + ${project.version} + + + org.apache.lucene + lucene-test-framework + ${project.version} + test-jar + test + + + org.carrot2 + carrot2-core + + + junit + junit + test + + + + ${build-directory} + ${build-directory}/classes + ${build-directory}/test-classes + + + src/test/resources + + + ../../src/test-files + + + + + org.apache.maven.plugins + maven-surefire-plugin + + + ../../../../testlogging.properties + + + + + + diff --git a/solr/contrib/clustering/solr-clustering-pom.xml.template b/dev-tools/maven/solr/contrib/dataimporthandler/pom.xml.template similarity index 58% rename from solr/contrib/clustering/solr-clustering-pom.xml.template rename to dev-tools/maven/solr/contrib/dataimporthandler/pom.xml.template index ac066e01afe..72b5a1ccb0a 100644 --- a/solr/contrib/clustering/solr-clustering-pom.xml.template +++ b/dev-tools/maven/solr/contrib/dataimporthandler/pom.xml.template @@ -1,7 +1,6 @@ - - 4.0.0 - org.apache.solr solr-parent @version@ + ../../pom.xml - org.apache.solr - solr-clustering - Apache Solr Clustering - @version@ - Apache Solr Clustering - jar - - - org.apache.solr - solr-solrj - @version@ - - - org.apache.solr - solr-core - @version@ - - - - org.carrot2 - carrot2-mini - 3.1.0 - - - - - carrot2.org - Carrot2 Maven2 repository - http://download.carrot2.org/maven2/ - - + solr-dataimporthandler-aggregator + pom + Apache Solr DataImportHandler aggregator POM + Apache Solr DataImportHandler aggregator POM + + src + src/extras + + + target/solr-dataimporthandler-aggregator + + + org.apache.maven.plugins + maven-deploy-plugin + + true + + + + diff --git a/dev-tools/maven/solr/contrib/dataimporthandler/src/extras/pom.xml.template b/dev-tools/maven/solr/contrib/dataimporthandler/src/extras/pom.xml.template new file mode 100644 index 00000000000..739465af469 --- /dev/null +++ b/dev-tools/maven/solr/contrib/dataimporthandler/src/extras/pom.xml.template @@ -0,0 +1,132 @@ + + + 4.0.0 + + org.apache.solr + solr-parent + @version@ + ../../../../pom.xml + + org.apache.solr + solr-dataimporthandler-extras + jar + Apache Solr DataImportHandler Extras + Apache Solr DataImportHandler Extras + + solr/contrib/dataimporthandler/src/extras + ../../target/extras + 4.0 + + + + ${project.groupId} + solr-core + ${project.version} + + + ${project.groupId} + solr-core + ${project.version} + test-jar + test + + + ${project.groupId} + solr-dataimporthandler + ${project.version} + + + ${project.groupId} + solr-dataimporthandler + ${project.version} + test-jar + test + + + ${project.groupId} + solr-solrj + ${project.version} + + + org.apache.lucene + lucene-test-framework + ${project.version} + test-jar + test + + + commons-io + commons-io + + + javax.activation + activation + + + javax.mail + mail + + + org.apache.tika + tika-parsers + + + org.slf4j + slf4j-api + + + xerces + xercesImpl + + + junit + junit + test + + + + ${build-directory} + ${build-directory}/extras/classes + ${build-directory}/extras/test-classes + main/java + test/java + + + test/resources + + + ../../../../src/test-files + + + + + org.apache.maven.plugins + maven-surefire-plugin + + + ../../../../../../testlogging.properties + + + + + + diff --git a/dev-tools/maven/solr/contrib/dataimporthandler/src/pom.xml.template b/dev-tools/maven/solr/contrib/dataimporthandler/src/pom.xml.template new file mode 100644 index 00000000000..e0ea149c3c4 --- /dev/null +++ b/dev-tools/maven/solr/contrib/dataimporthandler/src/pom.xml.template @@ -0,0 +1,129 @@ + + + 4.0.0 + + org.apache.solr + solr-parent + @version@ + ../../../pom.xml + + org.apache.solr + solr-dataimporthandler + jar + Apache Solr DataImportHandler + Apache Solr DataImportHandler + + solr/contrib/dataimporthandler + ../target + 4.0 + + + + ${project.groupId} + solr-core + ${project.version} + + + ${project.groupId} + solr-core + ${project.version} + test-jar + test + + + ${project.groupId} + solr-solrj + ${project.version} + + + org.apache.lucene + lucene-analyzers-common + ${project.version} + + + org.apache.lucene + lucene-test-framework + ${project.version} + test-jar + test + + + org.apache.geronimo.specs + geronimo-stax-api_1.0_spec + + + commons-io + commons-io + + + org.slf4j + slf4j-api + + + junit + junit + test + + + org.easymock + easymock + test + + + + ${build-directory} + ${build-directory}/classes + ${build-directory}/test-classes + main/java + test/java + + + test/resources + + + ../../../src/test-files + + + + + org.apache.maven.plugins + maven-jar-plugin + + + + test-jar + + + + + + org.apache.maven.plugins + maven-surefire-plugin + + + ../../../../../testlogging.properties + + + + + + diff --git a/dev-tools/maven/solr/contrib/extraction/pom.xml.template b/dev-tools/maven/solr/contrib/extraction/pom.xml.template new file mode 100644 index 00000000000..6d76eaece14 --- /dev/null +++ b/dev-tools/maven/solr/contrib/extraction/pom.xml.template @@ -0,0 +1,114 @@ + + + 4.0.0 + + org.apache.solr + solr-parent + @version@ + ../../pom.xml + + org.apache.solr + solr-cell + jar + Apache Solr Content Extraction Library + + Apache Solr Content Extraction Library integrates Apache Tika + content extraction framework into Solr + + + solr/contrib/extraction + build + 4.0 + + + + ${project.groupId} + solr-core + ${project.version} + + + ${project.groupId} + solr-core + ${project.version} + test-jar + test + + + ${project.groupId} + solr-solrj + ${project.version} + + + org.apache.lucene + lucene-analyzers-common + ${project.version} + + + org.apache.lucene + lucene-test-framework + ${project.version} + test-jar + test + + + com.ibm.icu + icu4j + + + org.apache.tika + tika-parsers + + + xerces + xercesImpl + + + junit + junit + test + + + + ${build-directory} + ${build-directory}/classes + ${build-directory}/test-classes + + + src/test/resources + + + ../../src/test-files + + + + + org.apache.maven.plugins + maven-surefire-plugin + + + ../../../../testlogging.properties + + + + + + diff --git a/solr/contrib/dataimporthandler/solr-dataimporthandler-extras-pom.xml.template b/dev-tools/maven/solr/contrib/pom.xml.template similarity index 55% rename from solr/contrib/dataimporthandler/solr-dataimporthandler-extras-pom.xml.template rename to dev-tools/maven/solr/contrib/pom.xml.template index 287479c73cb..e4a731aa6a7 100644 --- a/solr/contrib/dataimporthandler/solr-dataimporthandler-extras-pom.xml.template +++ b/dev-tools/maven/solr/contrib/pom.xml.template @@ -1,7 +1,6 @@ - + xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" + xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd"> - 4.0.0 - org.apache.solr solr-parent @version@ + ../pom.xml - org.apache.solr - solr-dataimporthandler-extras - Apache Solr DataImportHandler Extras - @version@ - Apache Solr DataImportHandler Extras - jar - - - - javax.activation - activation - 1.1 - - - javax.mail - mail - 1.4.1 - - - + solr-contrib-aggregator + Apache Solr Contrib aggregator POM + pom + + analysis-extras + clustering + dataimporthandler + extraction + uima + + + ../build/solr-contrib-aggregator + + + org.apache.maven.plugins + maven-deploy-plugin + + true + + + + diff --git a/dev-tools/maven/solr/contrib/uima/pom.xml.template b/dev-tools/maven/solr/contrib/uima/pom.xml.template new file mode 100644 index 00000000000..ef314604ba1 --- /dev/null +++ b/dev-tools/maven/solr/contrib/uima/pom.xml.template @@ -0,0 +1,128 @@ + + + 4.0.0 + + org.apache.solr + solr-parent + @version@ + ../../pom.xml + + org.apache.solr + solr-uima + jar + Apache Solr - UIMA integration + Apache Solr - UIMA integration + + solr/contrib/uima + build + 4.0 + + + + ${project.groupId} + solr-core + ${project.version} + + + ${project.groupId} + solr-core + ${project.version} + test-jar + test + + + ${project.groupId} + solr-solrj + ${project.version} + + + org.apache.lucene + lucene-test-framework + ${project.version} + test-jar + test + + + commons-digester + commons-digester + + + commons-lang + commons-lang + + + org.apache.solr + uima-alchemy-annotator + + + org.apache.solr + uima-OpenCalaisAnnotator + + + org.apache.solr + uima-Tagger + + + org.apache.solr + uima-WhitespaceTokenizer + + + org.apache.solr + uima-uimaj-core + + + org.slf4j + slf4j-simple + + + junit + junit + test + + + + ${build-directory} + ${build-directory}/classes + ${build-directory}/test-classes + + + src/main/resources + + + + + src/test/resources + + + + + org.apache.maven.plugins + maven-surefire-plugin + + + ../../../../testlogging.properties + + + + + + diff --git a/solr/src/maven/solr-parent-pom.xml.template b/dev-tools/maven/solr/pom.xml.template similarity index 51% rename from solr/src/maven/solr-parent-pom.xml.template rename to dev-tools/maven/solr/pom.xml.template index 211f32de862..f866ca184d5 100644 --- a/solr/src/maven/solr-parent-pom.xml.template +++ b/dev-tools/maven/solr/pom.xml.template @@ -1,7 +1,6 @@ - + xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" + xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd"> - 4.0.0 - - org.apache - apache - 4 + org.apache.lucene + lucene-solr-grandparent + @version@ + ../pom.xml - org.apache.solr solr-parent - Apache Solr Parent POM - @version@ - Apache Solr Parent POM - http://lucene.apache.org/solr pom - + Apache Solr parent POM + Apache Solr parent POM + + src + src/solrj + src/webapp + contrib + + + 1.6 + JIRA http://issues.apache.org/jira/browse/SOLR - Hudson http://lucene.zones.apache.org:8080/hudson/job/Solr-Nightly/ - Solr User List @@ -59,41 +60,35 @@ - Solr Developer List - solr-dev-subscribe@lucene.apache.org - solr-dev-unsubscribe@lucene.apache.org - - http://mail-archives.apache.org/mod_mbox/solr-dev/ - + Java Developer List + dev-subscribe@lucene.apache.org + dev-unsubscribe@lucene.apache.org + http://mail-archives.apache.org/mod_mbox/lucene-dev/ - Solr Commits List - solr-commits-subscribe@lucene.apache.org - - solr-commits-unsubscribe@lucene.apache.org - + Java Commits List + commits-subscribe@lucene.apache.org + commits-unsubscribe@lucene.apache.org - http://mail-archives.apache.org/mod_mbox/solr-commits/ + http://mail-archives.apache.org/mod_mbox/lucene-java-commits/ - 2006 - - - - Apache 2 - http://www.apache.org/licenses/LICENSE-2.0.txt - - - - - - scm:svn:http://svn.apache.org/repos/asf/lucene/dev - - - scm:svn:https://svn.apache.org/repos/asf/lucene/dev - - - + + build/solr-parent + + + + org.apache.maven.plugins + maven-javadoc-plugin + + + ${project.name} ${project.version} API (${now.version}) + ${project.name} ${project.version} API (${now.version}) + + + + + diff --git a/dev-tools/maven/solr/src/pom.xml.template b/dev-tools/maven/solr/src/pom.xml.template new file mode 100644 index 00000000000..ec3d6258643 --- /dev/null +++ b/dev-tools/maven/solr/src/pom.xml.template @@ -0,0 +1,252 @@ + + + 4.0.0 + + org.apache.solr + solr-parent + @version@ + ../pom.xml + + org.apache.solr + solr-core + jar + Apache Solr Core + Apache Solr Core + + solr + ../build + 4.0 + + + + ${project.groupId} + solr-solrj + ${project.version} + + + ${project.groupId} + solr-noggit + + + org.apache.lucene + lucene-test-framework + ${project.version} + test-jar + test + + + org.apache.lucene + lucene-analyzers-common + ${project.version} + + + org.apache.lucene + lucene-analyzers-phonetic + ${project.version} + + + org.apache.lucene + lucene-highlighter + ${project.version} + + + org.apache.lucene + lucene-memory + ${project.version} + + + org.apache.lucene + lucene-misc + ${project.version} + + + org.apache.lucene + lucene-queries + ${project.version} + + + org.apache.lucene + lucene-spatial + ${project.version} + + + org.apache.lucene + lucene-spellchecker + ${project.version} + + + org.apache.solr + solr-commons-csv + + + org.apache.geronimo.specs + geronimo-stax-api_1.0_spec + + + commons-codec + commons-codec + + + commons-fileupload + commons-fileupload + + + commons-httpclient + commons-httpclient + + + commons-io + commons-io + + + commons-lang + commons-lang + + + org.apache.velocity + velocity + + + org.apache.velocity + velocity-tools + + + org.mortbay.jetty + jetty + + + org.mortbay.jetty + jetty-util + + + org.mortbay.jetty + jsp-2.1-jetty + provided + + + org.slf4j + slf4j-api + + + org.slf4j + slf4j-jdk14 + + + javax.servlet + servlet-api + provided + + + junit + junit + test + + + org.easymock + easymock + test + + + + ${build-directory} + ${build-directory}/solr + ${build-directory}/tests + java + test + + + test-files + + + + + org.codehaus.mojo + build-helper-maven-plugin + + + add-source + generate-sources + + add-source + + + + webapp/src + + + + + + + org.apache.maven.plugins + maven-jar-plugin + + + + test-jar + + + + + + org.apache.maven.plugins + maven-surefire-plugin + + + ../../../testlogging.properties + + + + + org.codehaus.mojo + appassembler-maven-plugin + + -Xmx128M + flat + + windows + unix + + + + org.apache.solr.client.solrj.embedded.JettySolrRunner + JettySolrRunner + + + org.apache.solr.util.BitSetPerf + BitSetPerf + -Xms128m -Xbatch + + + org.apache.solr.util.SimplePostTool + SimplePostTool + + + org.apache.solr.util.SuggestMissingFactories + SuggestMissingFactories + + + + + + + diff --git a/dev-tools/maven/solr/src/solrj/pom.xml.template b/dev-tools/maven/solr/src/solrj/pom.xml.template new file mode 100644 index 00000000000..e4ed4c7cf2f --- /dev/null +++ b/dev-tools/maven/solr/src/solrj/pom.xml.template @@ -0,0 +1,135 @@ + + + 4.0.0 + + org.apache.solr + solr-parent + @version@ + ../../pom.xml + + org.apache.solr + solr-solrj + jar + Apache Solr Solrj + Apache Solr Solrj + + solr/src/solrj + ../../build/solrj + 4.0 + + + + org.apache.lucene + lucene-core + ${project.version} + + + org.apache.lucene + lucene-test-framework + ${project.version} + test-jar + test + + + org.apache.lucene + lucene-analyzers-common + ${project.version} + test + + + org.apache.geronimo.specs + geronimo-stax-api_1.0_spec + + + org.apache.zookeeper + zookeeper + + + javax.jms + jms + + + com.sun.jmx + jmxri + + + com.sun.jdmk + jmxtools + + + + + commons-httpclient + commons-httpclient + + + commons-io + commons-io + + + org.slf4j + slf4j-api + + + junit + junit + test + + + + ${build-directory} + ${build-directory} + . + + + + org.codehaus.mojo + build-helper-maven-plugin + + + add-source + generate-sources + + add-source + + + + ../common + + + + + + + org.apache.maven.plugins + maven-surefire-plugin + + + + + + true + + + + + diff --git a/dev-tools/maven/solr/src/webapp/pom.xml.template b/dev-tools/maven/solr/src/webapp/pom.xml.template new file mode 100644 index 00000000000..3985e0fa4b1 --- /dev/null +++ b/dev-tools/maven/solr/src/webapp/pom.xml.template @@ -0,0 +1,191 @@ + + + 4.0.0 + + org.apache.solr + solr-parent + @version@ + ../../pom.xml + + org.apache.solr + solr + war + Apache Solr Search Server + Apache Solr Search Server + + solr/src/webapp + ../../build/web + + + + ${project.groupId} + solr-core + ${project.version} + + + commons-chain + commons-chain + + + commons-digester + commons-digester + + + commons-logging + commons-logging + + + commons-validator + commons-validator + + + jakarta-regexp + jakarta-regexp + + + dom4j + dom4j + + + log4j + log4j + + + javax.activation + activation + + + javax.mail + mail + + + jline + jline + + + org.apache.struts + struts-core + + + org.apache.struts + struts-taglib + + + org.apache.struts + struts-tiles + + + org.mortbay.jetty + jetty + + + org.mortbay.jetty + jetty-util + + + oro + oro + + + sslext + sslext + + + xml-apis + xml-apis + + + + + ${project.groupId} + solr-dataimporthandler + ${project.version} + + + org.codehaus.woodstox + wstx-asl + + + org.slf4j + jcl-over-slf4j + + + org.slf4j + log4j-over-slf4j + + + + ${build-directory} + + + org.apache.maven.plugins + maven-javadoc-plugin + + true + + + + org.apache.maven.plugins + maven-source-plugin + + + + + + false + + + + org.apache.maven.plugins + maven-war-plugin + + web + web/WEB-INF/web.xml + + + ../../contrib/dataimporthandler/src/main/webapp + + + + + + org.mortbay.jetty + maven-jetty-plugin + + 10 + + + 8080 + 60000 + + + + / + + web,../../contrib/dataimporthandler/src/main/webapp + + + + + + + diff --git a/dev-tools/prettify/lang-apollo.js b/dev-tools/prettify/lang-apollo.js new file mode 100644 index 00000000000..40420308094 --- /dev/null +++ b/dev-tools/prettify/lang-apollo.js @@ -0,0 +1 @@ +PR.registerLangHandler(PR.createSimpleLexer([[PR.PR_COMMENT,/^#[^\r\n]*/,null,'#'],[PR.PR_PLAIN,/^[\t\n\r \xA0]+/,null,' \n\r \xa0'],[PR.PR_STRING,/^\"(?:[^\"\\]|\\[\s\S])*(?:\"|$)/,null,'\"']],[[PR.PR_KEYWORD,/^(?:ADS|AD|AUG|BZF|BZMF|CAE|CAF|CA|CCS|COM|CS|DAS|DCA|DCOM|DCS|DDOUBL|DIM|DOUBLE|DTCB|DTCF|DV|DXCH|EDRUPT|EXTEND|INCR|INDEX|NDX|INHINT|LXCH|MASK|MSK|MP|MSU|NOOP|OVSK|QXCH|RAND|READ|RELINT|RESUME|RETURN|ROR|RXOR|SQUARE|SU|TCR|TCAA|OVSK|TCF|TC|TS|WAND|WOR|WRITE|XCH|XLQ|XXALQ|ZL|ZQ|ADD|ADZ|SUB|SUZ|MPY|MPR|MPZ|DVP|COM|ABS|CLA|CLZ|LDQ|STO|STQ|ALS|LLS|LRS|TRA|TSQ|TMI|TOV|AXT|TIX|DLY|INP|OUT)\s/,null],[PR.PR_TYPE,/^(?:-?GENADR|=MINUS|2BCADR|VN|BOF|MM|-?2CADR|-?[1-6]DNADR|ADRES|BBCON|[SE]?BANK\=?|BLOCK|BNKSUM|E?CADR|COUNT\*?|2?DEC\*?|-?DNCHAN|-?DNPTR|EQUALS|ERASE|MEMORY|2?OCT|REMADR|SETLOC|SUBRO|ORG|BSS|BES|SYN|EQU|DEFINE|END)\s/,null],[PR.PR_LITERAL,/^\'(?:-*(?:\w|\\[\x21-\x7e])(?:[\w-]*|\\[\x21-\x7e])[=!?]?)?/],[PR.PR_PLAIN,/^-*(?:[!-z_]|\\[\x21-\x7e])(?:[\w-]*|\\[\x21-\x7e])[=!?]?/i],[PR.PR_PUNCTUATION,/^[^\w\t\n\r \xA0()\"\\\';]+/]]),['apollo','agc','aea']) \ No newline at end of file diff --git a/dev-tools/prettify/lang-css.js b/dev-tools/prettify/lang-css.js new file mode 100644 index 00000000000..c650d8f0fdb --- /dev/null +++ b/dev-tools/prettify/lang-css.js @@ -0,0 +1 @@ +PR.registerLangHandler(PR.createSimpleLexer([[PR.PR_PLAIN,/^[ \t\r\n\f]+/,null,' \r\n ']],[[PR.PR_STRING,/^\"(?:[^\n\r\f\\\"]|\\(?:\r\n?|\n|\f)|\\[\s\S])*\"/,null],[PR.PR_STRING,/^\'(?:[^\n\r\f\\\']|\\(?:\r\n?|\n|\f)|\\[\s\S])*\'/,null],['lang-css-str',/^url\(([^\)\"\']*)\)/i],[PR.PR_KEYWORD,/^(?:url|rgb|\!important|@import|@page|@media|@charset|inherit)(?=[^\-\w]|$)/i,null],['lang-css-kw',/^(-?(?:[_a-z]|(?:\\[0-9a-f]+ ?))(?:[_a-z0-9\-]|\\(?:\\[0-9a-f]+ ?))*)\s*:/i],[PR.PR_COMMENT,/^\/\*[^*]*\*+(?:[^\/*][^*]*\*+)*\//],[PR.PR_COMMENT,/^(?:)/],[PR.PR_LITERAL,/^(?:\d+|\d*\.\d+)(?:%|[a-z]+)?/i],[PR.PR_LITERAL,/^#(?:[0-9a-f]{3}){1,2}/i],[PR.PR_PLAIN,/^-?(?:[_a-z]|(?:\\[\da-f]+ ?))(?:[_a-z\d\-]|\\(?:\\[\da-f]+ ?))*/i],[PR.PR_PUNCTUATION,/^[^\s\w\'\"]+/]]),['css']),PR.registerLangHandler(PR.createSimpleLexer([],[[PR.PR_KEYWORD,/^-?(?:[_a-z]|(?:\\[\da-f]+ ?))(?:[_a-z\d\-]|\\(?:\\[\da-f]+ ?))*/i]]),['css-kw']),PR.registerLangHandler(PR.createSimpleLexer([],[[PR.PR_STRING,/^[^\)\"\']+/]]),['css-str']) \ No newline at end of file diff --git a/dev-tools/prettify/lang-hs.js b/dev-tools/prettify/lang-hs.js new file mode 100644 index 00000000000..27b221acd7c --- /dev/null +++ b/dev-tools/prettify/lang-hs.js @@ -0,0 +1 @@ +PR.registerLangHandler(PR.createSimpleLexer([[PR.PR_PLAIN,/^[\t\n\x0B\x0C\r ]+/,null,' \n \r '],[PR.PR_STRING,/^\"(?:[^\"\\\n\x0C\r]|\\[\s\S])*(?:\"|$)/,null,'\"'],[PR.PR_STRING,/^\'(?:[^\'\\\n\x0C\r]|\\[^&])\'?/,null,'\''],[PR.PR_LITERAL,/^(?:0o[0-7]+|0x[\da-f]+|\d+(?:\.\d+)?(?:e[+\-]?\d+)?)/i,null,'0123456789']],[[PR.PR_COMMENT,/^(?:(?:--+(?:[^\r\n\x0C]*)?)|(?:\{-(?:[^-]|-+[^-\}])*-\}))/],[PR.PR_KEYWORD,/^(?:case|class|data|default|deriving|do|else|if|import|in|infix|infixl|infixr|instance|let|module|newtype|of|then|type|where|_)(?=[^a-zA-Z0-9\']|$)/,null],[PR.PR_PLAIN,/^(?:[A-Z][\w\']*\.)*[a-zA-Z][\w\']*/],[PR.PR_PUNCTUATION,/^[^\t\n\x0B\x0C\r a-zA-Z0-9\'\"]+/]]),['hs']) \ No newline at end of file diff --git a/dev-tools/prettify/lang-lisp.js b/dev-tools/prettify/lang-lisp.js new file mode 100644 index 00000000000..85c6c23d0d3 --- /dev/null +++ b/dev-tools/prettify/lang-lisp.js @@ -0,0 +1 @@ +PR.registerLangHandler(PR.createSimpleLexer([['opn',/^\(/,null,'('],['clo',/^\)/,null,')'],[PR.PR_COMMENT,/^;[^\r\n]*/,null,';'],[PR.PR_PLAIN,/^[\t\n\r \xA0]+/,null,' \n\r \xa0'],[PR.PR_STRING,/^\"(?:[^\"\\]|\\[\s\S])*(?:\"|$)/,null,'\"']],[[PR.PR_KEYWORD,/^(?:block|c[ad]+r|catch|cons|defun|do|eq|eql|equal|equalp|eval-when|flet|format|go|if|labels|lambda|let|load-time-value|locally|macrolet|multiple-value-call|nil|progn|progv|quote|require|return-from|setq|symbol-macrolet|t|tagbody|the|throw|unwind)\b/,null],[PR.PR_LITERAL,/^[+\-]?(?:0x[0-9a-f]+|\d+\/\d+|(?:\.\d+|\d+(?:\.\d*)?)(?:[ed][+\-]?\d+)?)/i],[PR.PR_LITERAL,/^\'(?:-*(?:\w|\\[\x21-\x7e])(?:[\w-]*|\\[\x21-\x7e])[=!?]?)?/],[PR.PR_PLAIN,/^-*(?:[a-z_]|\\[\x21-\x7e])(?:[\w-]*|\\[\x21-\x7e])[=!?]?/i],[PR.PR_PUNCTUATION,/^[^\w\t\n\r \xA0()\"\\\';]+/]]),['cl','el','lisp','scm']) \ No newline at end of file diff --git a/dev-tools/prettify/lang-lua.js b/dev-tools/prettify/lang-lua.js new file mode 100644 index 00000000000..d107bab01eb --- /dev/null +++ b/dev-tools/prettify/lang-lua.js @@ -0,0 +1 @@ +PR.registerLangHandler(PR.createSimpleLexer([[PR.PR_PLAIN,/^[\t\n\r \xA0]+/,null,' \n\r \xa0'],[PR.PR_STRING,/^(?:\"(?:[^\"\\]|\\[\s\S])*(?:\"|$)|\'(?:[^\'\\]|\\[\s\S])*(?:\'|$))/,null,'\"\'']],[[PR.PR_COMMENT,/^--(?:\[(=*)\[[\s\S]*?(?:\]\1\]|$)|[^\r\n]*)/],[PR.PR_STRING,/^\[(=*)\[[\s\S]*?(?:\]\1\]|$)/],[PR.PR_KEYWORD,/^(?:and|break|do|else|elseif|end|false|for|function|if|in|local|nil|not|or|repeat|return|then|true|until|while)\b/,null],[PR.PR_LITERAL,/^[+-]?(?:0x[\da-f]+|(?:(?:\.\d+|\d+(?:\.\d*)?)(?:e[+\-]?\d+)?))/i],[PR.PR_PLAIN,/^[a-z_]\w*/i],[PR.PR_PUNCTUATION,/^[^\w\t\n\r \xA0][^\w\t\n\r \xA0\"\'\-\+=]*/]]),['lua']) \ No newline at end of file diff --git a/dev-tools/prettify/lang-ml.js b/dev-tools/prettify/lang-ml.js new file mode 100644 index 00000000000..698d6de4e98 --- /dev/null +++ b/dev-tools/prettify/lang-ml.js @@ -0,0 +1 @@ +PR.registerLangHandler(PR.createSimpleLexer([[PR.PR_PLAIN,/^[\t\n\r \xA0]+/,null,' \n\r \xa0'],[PR.PR_COMMENT,/^#(?:if[\t\n\r \xA0]+(?:[a-z_$][\w\']*|``[^\r\n\t`]*(?:``|$))|else|endif|light)/i,null,'#'],[PR.PR_STRING,/^(?:\"(?:[^\"\\]|\\[\s\S])*(?:\"|$)|\'(?:[^\'\\]|\\[\s\S])*(?:\'|$))/,null,'\"\'']],[[PR.PR_COMMENT,/^(?:\/\/[^\r\n]*|\(\*[\s\S]*?\*\))/],[PR.PR_KEYWORD,/^(?:abstract|and|as|assert|begin|class|default|delegate|do|done|downcast|downto|elif|else|end|exception|extern|false|finally|for|fun|function|if|in|inherit|inline|interface|internal|lazy|let|match|member|module|mutable|namespace|new|null|of|open|or|override|private|public|rec|return|static|struct|then|to|true|try|type|upcast|use|val|void|when|while|with|yield|asr|land|lor|lsl|lsr|lxor|mod|sig|atomic|break|checked|component|const|constraint|constructor|continue|eager|event|external|fixed|functor|global|include|method|mixin|object|parallel|process|protected|pure|sealed|trait|virtual|volatile)\b/],[PR.PR_LITERAL,/^[+\-]?(?:0x[\da-f]+|(?:(?:\.\d+|\d+(?:\.\d*)?)(?:e[+\-]?\d+)?))/i],[PR.PR_PLAIN,/^(?:[a-z_]\w*[!?#]?|``[^\r\n\t`]*(?:``|$))/i],[PR.PR_PUNCTUATION,/^[^\t\n\r \xA0\"\'\w]+/]]),['fs','ml']) \ No newline at end of file diff --git a/dev-tools/prettify/lang-proto.js b/dev-tools/prettify/lang-proto.js new file mode 100644 index 00000000000..e67967f3e64 --- /dev/null +++ b/dev-tools/prettify/lang-proto.js @@ -0,0 +1 @@ +PR.registerLangHandler(PR.sourceDecorator({keywords:'bool bytes default double enum extend extensions false fixed32 fixed64 float group import int32 int64 max message option optional package repeated required returns rpc service sfixed32 sfixed64 sint32 sint64 string syntax to true uint32 uint64',cStyleComments:true}),['proto']) \ No newline at end of file diff --git a/dev-tools/prettify/lang-sql.js b/dev-tools/prettify/lang-sql.js new file mode 100644 index 00000000000..ff381cd4a54 --- /dev/null +++ b/dev-tools/prettify/lang-sql.js @@ -0,0 +1 @@ +PR.registerLangHandler(PR.createSimpleLexer([[PR.PR_PLAIN,/^[\t\n\r \xA0]+/,null,' \n\r \xa0'],[PR.PR_STRING,/^(?:"(?:[^\"\\]|\\.)*"|'(?:[^\'\\]|\\.)*')/,null,'\"\'']],[[PR.PR_COMMENT,/^(?:--[^\r\n]*|\/\*[\s\S]*?(?:\*\/|$))/],[PR.PR_KEYWORD,/^(?:ADD|ALL|ALTER|AND|ANY|AS|ASC|AUTHORIZATION|BACKUP|BEGIN|BETWEEN|BREAK|BROWSE|BULK|BY|CASCADE|CASE|CHECK|CHECKPOINT|CLOSE|CLUSTERED|COALESCE|COLLATE|COLUMN|COMMIT|COMPUTE|CONSTRAINT|CONTAINS|CONTAINSTABLE|CONTINUE|CONVERT|CREATE|CROSS|CURRENT|CURRENT_DATE|CURRENT_TIME|CURRENT_TIMESTAMP|CURRENT_USER|CURSOR|DATABASE|DBCC|DEALLOCATE|DECLARE|DEFAULT|DELETE|DENY|DESC|DISK|DISTINCT|DISTRIBUTED|DOUBLE|DROP|DUMMY|DUMP|ELSE|END|ERRLVL|ESCAPE|EXCEPT|EXEC|EXECUTE|EXISTS|EXIT|FETCH|FILE|FILLFACTOR|FOR|FOREIGN|FREETEXT|FREETEXTTABLE|FROM|FULL|FUNCTION|GOTO|GRANT|GROUP|HAVING|HOLDLOCK|IDENTITY|IDENTITYCOL|IDENTITY_INSERT|IF|IN|INDEX|INNER|INSERT|INTERSECT|INTO|IS|JOIN|KEY|KILL|LEFT|LIKE|LINENO|LOAD|NATIONAL|NOCHECK|NONCLUSTERED|NOT|NULL|NULLIF|OF|OFF|OFFSETS|ON|OPEN|OPENDATASOURCE|OPENQUERY|OPENROWSET|OPENXML|OPTION|OR|ORDER|OUTER|OVER|PERCENT|PLAN|PRECISION|PRIMARY|PRINT|PROC|PROCEDURE|PUBLIC|RAISERROR|READ|READTEXT|RECONFIGURE|REFERENCES|REPLICATION|RESTORE|RESTRICT|RETURN|REVOKE|RIGHT|ROLLBACK|ROWCOUNT|ROWGUIDCOL|RULE|SAVE|SCHEMA|SELECT|SESSION_USER|SET|SETUSER|SHUTDOWN|SOME|STATISTICS|SYSTEM_USER|TABLE|TEXTSIZE|THEN|TO|TOP|TRAN|TRANSACTION|TRIGGER|TRUNCATE|TSEQUAL|UNION|UNIQUE|UPDATE|UPDATETEXT|USE|USER|VALUES|VARYING|VIEW|WAITFOR|WHEN|WHERE|WHILE|WITH|WRITETEXT)(?=[^\w-]|$)/i,null],[PR.PR_LITERAL,/^[+-]?(?:0x[\da-f]+|(?:(?:\.\d+|\d+(?:\.\d*)?)(?:e[+\-]?\d+)?))/i],[PR.PR_PLAIN,/^[a-z_][\w-]*/i],[PR.PR_PUNCTUATION,/^[^\w\t\n\r \xA0\"\'][^\w\t\n\r \xA0+\-\"\']*/]]),['sql']) \ No newline at end of file diff --git a/dev-tools/prettify/lang-vb.js b/dev-tools/prettify/lang-vb.js new file mode 100644 index 00000000000..cabce853999 --- /dev/null +++ b/dev-tools/prettify/lang-vb.js @@ -0,0 +1 @@ +PR.registerLangHandler(PR.createSimpleLexer([[PR.PR_PLAIN,/^[\t\n\r \xA0\u2028\u2029]+/,null,' \n\r \xa0\u2028\u2029'],[PR.PR_STRING,/^(?:[\"\u201C\u201D](?:[^\"\u201C\u201D]|[\"\u201C\u201D]{2})(?:[\"\u201C\u201D]c|$)|[\"\u201C\u201D](?:[^\"\u201C\u201D]|[\"\u201C\u201D]{2})*(?:[\"\u201C\u201D]|$))/i,null,'\"\u201c\u201d'],[PR.PR_COMMENT,/^[\'\u2018\u2019][^\r\n\u2028\u2029]*/,null,'\'\u2018\u2019']],[[PR.PR_KEYWORD,/^(?:AddHandler|AddressOf|Alias|And|AndAlso|Ansi|As|Assembly|Auto|Boolean|ByRef|Byte|ByVal|Call|Case|Catch|CBool|CByte|CChar|CDate|CDbl|CDec|Char|CInt|Class|CLng|CObj|Const|CShort|CSng|CStr|CType|Date|Decimal|Declare|Default|Delegate|Dim|DirectCast|Do|Double|Each|Else|ElseIf|End|EndIf|Enum|Erase|Error|Event|Exit|Finally|For|Friend|Function|Get|GetType|GoSub|GoTo|Handles|If|Implements|Imports|In|Inherits|Integer|Interface|Is|Let|Lib|Like|Long|Loop|Me|Mod|Module|MustInherit|MustOverride|MyBase|MyClass|Namespace|New|Next|Not|NotInheritable|NotOverridable|Object|On|Option|Optional|Or|OrElse|Overloads|Overridable|Overrides|ParamArray|Preserve|Private|Property|Protected|Public|RaiseEvent|ReadOnly|ReDim|RemoveHandler|Resume|Return|Select|Set|Shadows|Shared|Short|Single|Static|Step|Stop|String|Structure|Sub|SyncLock|Then|Throw|To|Try|TypeOf|Unicode|Until|Variant|Wend|When|While|With|WithEvents|WriteOnly|Xor|EndIf|GoSub|Let|Variant|Wend)\b/i,null],[PR.PR_COMMENT,/^REM[^\r\n\u2028\u2029]*/i],[PR.PR_LITERAL,/^(?:True\b|False\b|Nothing\b|\d+(?:E[+\-]?\d+[FRD]?|[FRDSIL])?|(?:&H[0-9A-F]+|&O[0-7]+)[SIL]?|\d*\.\d+(?:E[+\-]?\d+)?[FRD]?|#\s+(?:\d+[\-\/]\d+[\-\/]\d+(?:\s+\d+:\d+(?::\d+)?(\s*(?:AM|PM))?)?|\d+:\d+(?::\d+)?(\s*(?:AM|PM))?)\s+#)/i],[PR.PR_PLAIN,/^(?:(?:[a-z]|_\w)\w*|\[(?:[a-z]|_\w)\w*\])/i],[PR.PR_PUNCTUATION,/^[^\w\t\n\r \"\'\[\]\xA0\u2018\u2019\u201C\u201D\u2028\u2029]+/],[PR.PR_PUNCTUATION,/^(?:\[|\])/]]),['vb','vbs']) \ No newline at end of file diff --git a/dev-tools/prettify/lang-wiki.js b/dev-tools/prettify/lang-wiki.js new file mode 100644 index 00000000000..00a1b6b4bf2 --- /dev/null +++ b/dev-tools/prettify/lang-wiki.js @@ -0,0 +1 @@ +PR.registerLangHandler(PR.createSimpleLexer([[PR.PR_PLAIN,/^[\t \xA0a-gi-z0-9]+/,null,' \xa0abcdefgijklmnopqrstuvwxyz0123456789'],[PR.PR_PUNCTUATION,/^[=*~\^\[\]]+/,null,'=*~^[]']],[['lang-wiki.meta',/(?:^^|\r\n?|\n)(#[a-z]+)\b/],[PR.PR_LITERAL,/^(?:[A-Z][a-z][a-z0-9]+[A-Z][a-z][a-zA-Z0-9]+)\b/],['lang-',/^\{\{\{([\s\S]+?)\}\}\}/],['lang-',/^`([^\r\n`]+)`/],[PR.PR_STRING,/^https?:\/\/[^\/?#\s]*(?:\/[^?#\s]*)?(?:\?[^#\s]*)?(?:#\S*)?/i],[PR.PR_PLAIN,/^(?:\r\n|[\s\S])[^#=*~^A-Zh\{`\[\r\n]*/]]),['wiki']),PR.registerLangHandler(PR.createSimpleLexer([[PR.PR_KEYWORD,/^#[a-z]+/i,null,'#']],[]),['wiki.meta']) \ No newline at end of file diff --git a/dev-tools/prettify/prettify.css b/dev-tools/prettify/prettify.css new file mode 100644 index 00000000000..2eb91bf01a9 --- /dev/null +++ b/dev-tools/prettify/prettify.css @@ -0,0 +1 @@ +.str,.atv{color:#080}.kwd,.tag{color:#008}.com{color:#800}.typ,.atn,.dec{color:#606}.lit{color:#066}.pun{color:#660}.pln{color:#000}pre.prettyprint{padding:2px;border:1px solid #888}@media print{.str{color:#060}.kwd,.tag{color:#006;font-weight:bold}.com{color:#600;font-style:italic}.typ{font-weight:bold}.lit{color:#044}.pun{color:#440}.atn,.typ{color:#404}.atv{color:#060}} \ No newline at end of file diff --git a/dev-tools/prettify/prettify.js b/dev-tools/prettify/prettify.js new file mode 100644 index 00000000000..29b5e738e45 --- /dev/null +++ b/dev-tools/prettify/prettify.js @@ -0,0 +1,46 @@ +window.PR_SHOULD_USE_CONTINUATION=true,window.PR_TAB_WIDTH=8,window.PR_normalizedHtml=window.PR=window.prettyPrintOne=window.prettyPrint=void +0,window._pr_isIE6=function(){var a=navigator&&navigator.userAgent&&navigator.userAgent.match(/\bMSIE ([678])\./);return a=a?+a[1]:false,window._pr_isIE6=function(){return a},a},(function(){var +a=true,b=null,c='break continue do else for if return while auto case char const default double enum extern float goto int long register short signed sizeof static struct switch typedef union unsigned void volatile catch class delete false import new operator private protected public this throw true try typeof ',d=c+'alignof align_union asm axiom bool '+'concept concept_map const_cast constexpr decltype '+'dynamic_cast explicit export friend inline late_check '+'mutable namespace nullptr reinterpret_cast static_assert static_cast '+'template typeid typename using virtual wchar_t where ',e=c+'abstract boolean byte extends final finally implements import '+'instanceof null native package strictfp super synchronized throws '+'transient ',f=e+'as base by checked decimal delegate descending event '+'fixed foreach from group implicit in interface internal into is lock '+'object out override orderby params partial readonly ref sbyte sealed '+'stackalloc string select uint ulong unchecked unsafe ushort var ',g=c+'debugger eval export function get null set undefined var with '+'Infinity NaN ',h='caller delete die do dump elsif eval exit foreach for goto if import last local my next no our print package redo require sub undef unless until use wantarray while BEGIN END ',i='break continue do else for if return while and as assert class def del elif except exec finally from global import in is lambda nonlocal not or pass print raise try with yield False True None ',j='break continue do else for if return while alias and begin case class def defined elsif end ensure false in module next nil not or redo rescue retry self super then true undef unless until when yield BEGIN END ',k='break continue do else for if return while case done elif esac eval fi function in local set then until ',l=d+f+g+h+i+j+k,m=(function(){var +a=['!','!=','!==','#','%','%=','&','&&','&&=','&=','(','*','*=','+=',',','-=','->','/','/=',':','::',';','<','<<','<<=','<=','=','==','===','>','>=','>>','>>=','>>>','>>>=','?','@','[','^','^=','^^','^^=','{','|','|=','||','||=','~','break','case','continue','delete','do','else','finally','instanceof','return','throw','try','typeof'],b='(?:^^|[+-]',c;for(c=0;c:&a-z])/g,'\\$1');return b+=')\\s*',b})(),n=/&/g,o=//g,q=/\"/g,r,s,t,u,v,w,x,y,z,A,B,C,D,E,F;function +G(a){return a.replace(n,'&').replace(o,'<').replace(p,'>').replace(q,'"')}function +H(a){return a.replace(n,'&').replace(o,'<').replace(p,'>')}C=/</g,B=/>/g,w=/'/g,E=/"/g,v=/&/g,D=/ /g;function +I(a){var b=a.indexOf('&'),c,d,e,f;if(b<0)return a;for(--b;(b=a.indexOf('&#',b+1))>=0;)d=a.indexOf(';',b),d>=0&&(e=a.substring(b+3,d),f=10,e&&e.charAt(0)==='x'&&(e=e.substring(1),f=16),c=parseInt(e,f),isNaN(c)||(a=a.substring(0,b)+String.fromCharCode(c)+a.substring(d+1)));return a.replace(C,'<').replace(B,'>').replace(w,'\'').replace(E,'\"').replace(D,' ').replace(v,'&')}function +J(a){return'XMP'===a.tagName}u=/[\r\n]/g;function K(c,d){var e;return'PRE'===c.tagName?a:u.test(d)?(e='',c.currentStyle?(e=c.currentStyle.whiteSpace):window.getComputedStyle&&(e=window.getComputedStyle(c,b).whiteSpace),!e||e==='pre'):a}function +L(a,b){var c,d,e,f;switch(a.nodeType){case 1:f=a.tagName.toLowerCase(),b.push('<',f);for(e=0;e');for(d=a.firstChild;d;d=d.nextSibling)L(d,b);(a.firstChild||!/^(?:br|link|img)$/.test(f))&&b.push('');break;case +2:b.push(a.name.toLowerCase(),'=\"',G(a.value),'\"');break;case 3:case 4:b.push(H(a.nodeValue))}}function +M(b){var c=0,d=false,e=false,f,g,h,i;for(f=0,g=b.length;f122||(g<65||q>90||d.push([Math.max(65,q)|32,Math.min(g,90)|32]),g<97||q>122||d.push([Math.max(97,q)&-33,Math.min(g,122)&-33]))}d.sort(function(a,b){return a[0]-b[0]||b[1]-a[1]}),f=[],i=[NaN,NaN];for(h=0;hp[0]&&(p[1]+1>p[0]&&n.push('-'),n.push(k(p[1])));return n.push(']'),n.join('')}function +m(a){var b=a.source.match(new RegExp('(?:\\[(?:[^\\x5C\\x5D]|\\\\[\\s\\S])*\\]|\\\\u[A-Fa-f0-9]{4}|\\\\x[A-Fa-f0-9]{2}|\\\\[0-9]+|\\\\[^ux0-9]|\\(\\?[:!=]|[\\(\\)\\^]|[^\\x5B\\x5C\\(\\)\\^]+)','g')),e=b.length,f=[],g,h,i,j,k;for(j=0,i=0;j=2&&g==='['?(b[j]=l(k)):g!=='\\'&&(b[j]=k.replace(/[a-zA-Z]/g,function(a){var +b=a.charCodeAt(0);return'['+String.fromCharCode(b&-33,b|32)+']'}));return b.join('')}i=[];for(f=0,g=b.length;f\n')),r=!/)[\r\n]+/g,'$1').replace(/(?:[\r\n]+[ \t]*)+/g,' ')),d;e=[];for(c=a.firstChild;c;c=c.nextSibling)L(c,e);return e.join('')}function +O(a){var c=0;return function(d){var e=b,f=0,g,h,i,j;for(h=0,i=d.length;h=0;j-=' '.length)e.push(' '.substring(0,j));f=h+1;break;case'\n':c=0;break;default:++c}}return e?(e.push(d.substring(f)),e.join('')):d}}z=new +RegExp('[^<]+|||\"\']|\'[^\']*\'|\"[^\"]*\")*>|<','g'),A=/^<\!--/,y=/^1&&j.charAt(0)==='<'){if(A.test(j))continue;if(y.test(j))c.push(j.substring(9,j.length-3)),d+=j.length-12;else +if(x.test(j))c.push('\n'),++d;else if(j.indexOf('nocode')>=0&&Q(j)){l=(j.match(F))[2],f=1;for(h=g+1;h=0;)d[o.charAt(i)]=m;n=m[1],k=''+n,g.hasOwnProperty(k)||(f.push(n),g[k]=b)}f.push(/[\0-\uffff]/),h=M(f)})(),f=c.length,g=/\S/,e=function(a){var +b=a.source,g=a.basePos,i,j,k,l,m,n,o,p,q,r,s,t,u,v,w,x,y;i=[g,'pln'],s=0,y=b.match(h)||[],u={};for(v=0,q=y.length;v=5&&'lang-'===t.substring(0,5),n&&!(p&&typeof +p[1]==='string')&&(n=false,t='src'),n||(u[w]=t)}x=s,s+=w.length,n?(j=p[1],l=w.indexOf(j),k=l+j.length,p[2]&&(k=w.length-p[2].length,l=k-j.length),o=t.substring(5),R(g+x,w.substring(0,l),e,i),R(g+x+l,j,W(o,j),i),R(g+x+k,w.substring(k),e,i)):i.push(g+x,t)}a.decorations=i},e}function +T(a){var c=[],d=[],e,f;return a.tripleQuotedStrings?c.push(['str',/^(?:\'\'\'(?:[^\'\\]|\\[\s\S]|\'{1,2}(?=[^\']))*(?:\'\'\'|$)|\"\"\"(?:[^\"\\]|\\[\s\S]|\"{1,2}(?=[^\"]))*(?:\"\"\"|$)|\'(?:[^\\\']|\\[\s\S])*(?:\'|$)|\"(?:[^\\\"]|\\[\s\S])*(?:\"|$))/,b,'\'\"']):a.multiLineStrings?c.push(['str',/^(?:\'(?:[^\\\']|\\[\s\S])*(?:\'|$)|\"(?:[^\\\"]|\\[\s\S])*(?:\"|$)|\`(?:[^\\\`]|\\[\s\S])*(?:\`|$))/,b,'\'\"`']):c.push(['str',/^(?:\'(?:[^\\\'\r\n]|\\.)*(?:\'|$)|\"(?:[^\\\"\r\n]|\\.)*(?:\"|$))/,b,'\"\'']),a.verbatimStrings&&d.push(['str',/^@\"(?:[^\"]|\"\")*(?:\"|$)/,b]),a.hashComments&&(a.cStyleComments?(c.push(['com',/^#(?:(?:define|elif|else|endif|error|ifdef|include|ifndef|line|pragma|undef|warning)\b|[^\r\n]*)/,b,'#']),d.push(['str',/^<(?:(?:(?:\.\.\/)*|\/?)(?:[\w-]+(?:\/[\w-]+)+)?[\w-]+\.h|[a-z]\w*)>/,b])):c.push(['com',/^#[^\r\n]*/,b,'#'])),a.cStyleComments&&(d.push(['com',/^\/\/[^\r\n]*/,b]),d.push(['com',/^\/\*[\s\S]*?(?:\*\/|$)/,b])),a.regexLiterals&&(e='/(?=[^/*])(?:[^/\\x5B\\x5C]|\\x5C[\\s\\S]|\\x5B(?:[^\\x5C\\x5D]|\\x5C[\\s\\S])*(?:\\x5D|$))+/',d.push(['lang-regex',new +RegExp('^'+m+'('+e+')')])),f=a.keywords.replace(/^\s+|\s+$/g,''),f.length&&d.push(['kwd',new +RegExp('^(?:'+f.replace(/\s+/g,'|')+')\\b'),b]),c.push(['pln',/^\s+/,b,' \r\n \xa0']),d.push(['lit',/^@[a-z_$][a-z_$@0-9]*/i,b],['typ',/^@?[A-Z]+[a-z][A-Za-z_$@0-9]*/,b],['pln',/^[a-z_$][a-z_$@0-9]*/i,b],['lit',new +RegExp('^(?:0x[a-f0-9]+|(?:\\d(?:_\\d+)*\\d*(?:\\.\\d*)?|\\.\\d\\+)(?:e[+\\-]?\\d+)?)[a-z]*','i'),b,'0123456789'],['pun',/^.[^\s\w\.$@\'\"\`\/\#]*/,b]),S(c,d)}s=T({keywords:l,hashComments:a,cStyleComments:a,multiLineStrings:a,regexLiterals:a});function +U(c){var d=c.source,e=c.extractedTags,f=c.decorations,g=[],h=0,i=b,j=b,k=0,l=0,m=O(window.PR_TAB_WIDTH),n=/([\r\n ]) /g,o=/(^| ) /gm,p=/\r\n?|\n/g,q=/[ \r\n]$/,r=a,s;function +t(a){var c,e;a>h&&(i&&i!==j&&(g.push(''),i=b),!i&&j&&(i=j,g.push('')),c=H(m(d.substring(h,a))).replace(r?o:n,'$1 '),r=q.test(c),e=window._pr_isIE6()?' 
':'
',g.push(c.replace(p,e)),h=a)}while(a){k'),i=b),g.push(e[k+1]),k+=2;else +if(l'),c.prettyPrintedHtml=g.join('')}t={};function +V(a,b){var c,d;for(d=b.length;--d>=0;)c=b[d],t.hasOwnProperty(c)?'console'in window&&console.warn('cannot override language handler %s',c):(t[c]=a)}function +W(a,b){return a&&t.hasOwnProperty(a)||(a=/^\s*]*(?:>|$)/],['com',/^<\!--[\s\S]*?(?:-\->|$)/],['lang-',/^<\?([\s\S]+?)(?:\?>|$)/],['lang-',/^<%([\s\S]+?)(?:%>|$)/],['pun',/^(?:<[%?]|[%?]>)/],['lang-',/^]*>([\s\S]+?)<\/xmp\b[^>]*>/i],['lang-js',/^]*>([\s\S]*?)(<\/script\b[^>]*>)/i],['lang-css',/^]*>([\s\S]*?)(<\/style\b[^>]*>)/i],['lang-in.tag',/^(<\/?[a-z][^<>]*>)/i]]),['default-markup','htm','html','mxml','xhtml','xml','xsl']),V(S([['pln',/^[\s]+/,b,' \r\n'],['atv',/^(?:\"[^\"]*\"?|\'[^\']*\'?)/,b,'\"\'']],[['tag',/^^<\/?[a-z](?:[\w.:-]*\w)?|\/?>$/i],['atn',/^(?!style[\s=]|on)[a-z](?:[\w:-]*\w)?/i],['lang-uq.val',/^=\s*([^>\'\"\s]*(?:[^>\'\"\s\/]|\/(?=\s)))/],['pun',/^[=<>\/]+/],['lang-js',/^on\w+\s*=\s*\"([^\"]+)\"/i],['lang-js',/^on\w+\s*=\s*\'([^\']+)\'/i],['lang-js',/^on\w+\s*=\s*([^\"\'>\s]+)/i],['lang-css',/^style\s*=\s*\"([^\"]+)\"/i],['lang-css',/^style\s*=\s*\'([^\']+)\'/i],['lang-css',/^style\s*=\s*([^\"\'>\s]+)/i]]),['in.tag']),V(S([],[['atv',/^[\s\S]+/]]),['uq.val']),V(T({keywords:d,hashComments:a,cStyleComments:a}),['c','cc','cpp','cxx','cyc','m']),V(T({keywords:'null true false'}),['json']),V(T({keywords:f,hashComments:a,cStyleComments:a,verbatimStrings:a}),['cs']),V(T({keywords:e,cStyleComments:a}),['java']),V(T({keywords:k,hashComments:a,multiLineStrings:a}),['bsh','csh','sh']),V(T({keywords:i,hashComments:a,multiLineStrings:a,tripleQuotedStrings:a}),['cv','py']),V(T({keywords:h,hashComments:a,multiLineStrings:a,regexLiterals:a}),['perl','pl','pm']),V(T({keywords:j,hashComments:a,multiLineStrings:a,regexLiterals:a}),['rb']),V(T({keywords:g,cStyleComments:a,regexLiterals:a}),['js']),V(S([],[['str',/^[\s\S]+/]]),['regex']);function +X(a){var b=a.sourceCodeHtml,c=a.langExtension,d,e;a.prettyPrintedHtml=b;try{e=P(b),d=e.source,a.source=d,a.basePos=0,a.extractedTags=e.tags,W(c,d)(a),U(a)}catch(f){'console'in +window&&(console.log(f),console.trace())}}function Y(a,b){var c={sourceCodeHtml:a,langExtension:b};return X(c),c.prettyPrintedHtml}function +Z(c){var d=window._pr_isIE6(),e=d===6?'\r\n':'\r',f=[document.getElementsByTagName('pre'),document.getElementsByTagName('code'),document.getElementsByTagName('xmp')],g=[],h,i,j,k,l,m;for(i=0;i=0){f=e.className.match(/\blang-(\w+)\b/),f&&(f=f[1]),i=false;for(j=e.parentNode;j;j=j.parentNode)if((j.tagName==='pre'||j.tagName==='code'||j.tagName==='xmp')&&j.className&&j.className.indexOf('prettyprint')>=0){i=a;break}i||(d=N(e),d=d.replace(/(?:\r\n?|\n)$/,''),m={sourceCodeHtml:d,langExtension:f,sourceNode:e},X(m),o())}}k=0;)i=j[h],i.parentNode.replaceChild(document.createTextNode(e),i)}}n()}window.PR_normalizedHtml=L,window.prettyPrintOne=Y,window.prettyPrint=Z,window.PR={combinePrefixPatterns:M,createSimpleLexer:S,registerLangHandler:V,sourceDecorator:T,PR_ATTRIB_NAME:'atn',PR_ATTRIB_VALUE:'atv',PR_COMMENT:'com',PR_DECLARATION:'dec',PR_KEYWORD:'kwd',PR_LITERAL:'lit',PR_NOCODE:'nocode',PR_PLAIN:'pln',PR_PUNCTUATION:'pun',PR_SOURCE:'src',PR_STRING:'str',PR_TAG:'tag',PR_TYPE:'typ'}})() \ No newline at end of file diff --git a/dev-tools/prettify/stylesheet+prettify.css b/dev-tools/prettify/stylesheet+prettify.css new file mode 100644 index 00000000000..1ceb0297b49 --- /dev/null +++ b/dev-tools/prettify/stylesheet+prettify.css @@ -0,0 +1,30 @@ +/* Javadoc style sheet */ + +/* Define colors, fonts and other style attributes here to override the defaults */ +.str,.atv{color:#080}.kwd,.tag{color:#008}.com{color:#800}.typ,.atn,.dec{color:#606}.lit{color:#066}.pun{color:#660}.pln{color:#000}pre.prettyprint{padding:2px;border:1px solid #888}@media print{.str{color:#060}.kwd,.tag{color:#006;font-weight:bold}.com{color:#600;font-style:italic}.typ{font-weight:bold}.lit{color:#044}.pun{color:#440}.atn,.typ{color:#404}.atv{color:#060}} + +/* Page background color */ +body { background-color: #FFFFFF; color:#000000 } + +/* Headings */ +h1 { font-size: 145% } + +/* Table colors */ +.TableHeadingColor { background: #CCCCFF; color:#000000 } /* Dark mauve */ +.TableSubHeadingColor { background: #EEEEFF; color:#000000 } /* Light mauve */ +.TableRowColor { background: #FFFFFF; color:#000000 } /* White */ + +/* Font used in left-hand frame lists */ +.FrameTitleFont { font-size: 100%; font-family: Helvetica, Arial, sans-serif; color:#000000 } +.FrameHeadingFont { font-size: 90%; font-family: Helvetica, Arial, sans-serif; color:#000000 } +.FrameItemFont { font-size: 90%; font-family: Helvetica, Arial, sans-serif; color:#000000 } + +/* Navigation bar fonts and colors */ +.NavBarCell1 { background-color:#EEEEFF; color:#000000} /* Light mauve */ +.NavBarCell1Rev { background-color:#00008B; color:#FFFFFF} /* Dark Blue */ +.NavBarFont1 { font-family: Arial, Helvetica, sans-serif; color:#000000;color:#000000;} +.NavBarFont1Rev { font-family: Arial, Helvetica, sans-serif; color:#FFFFFF;color:#FFFFFF;} + +.NavBarCell2 { font-family: Arial, Helvetica, sans-serif; background-color:#FFFFFF; color:#000000} +.NavBarCell3 { font-family: Arial, Helvetica, sans-serif; background-color:#FFFFFF; color:#000000} + diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 78a75fd5db7..bdd4940c487 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -89,19 +89,9 @@ Changes in backwards compatibility policy * LUCENE-2484: Removed deprecated TermAttribute. Use CharTermAttribute and TermToBytesRefAttribute instead. (Uwe Schindler) -* LUCENE-2602: The default (LogByteSizeMergePolicy) merge policy now - takes deletions into account by default. You can disable this by - calling setCalibrateSizeByDeletes(false) on the merge policy. (Mike - McCandless) - * LUCENE-2600: Remove IndexReader.isDeleted in favor of IndexReader.getDeletedDocs(). (Mike McCandless) -* LUCENE-2529, LUCENE-2668: Position increment gap and offset gap of empty - values in multi-valued field has been changed for some cases in index. - If you index empty fields and uses positions/offsets information on that - fields, reindex is recommended. (David Smiley, Koji Sekiguchi) - * LUCENE-2667: FuzzyQuery's defaults have changed for more performant behavior: the minimum similarity is 2 edit distances from the word, and the priority queue size is 50. To support this, FuzzyQuery now allows @@ -128,22 +118,43 @@ Changes in backwards compatibility policy ParallelMultiSearcher into IndexSearcher as an optional ExecutorServiced passed to its ctor. (Mike McCandless) +* LUCENE-2831: Changed Weight#scorer, Weight#explain & Filter#getDocIdSet to + operate on a AtomicReaderContext instead of directly on IndexReader to enable + searches to be aware of IndexSearcher's context. (Simon Willnauer) + +* LUCENE-2839: Scorer#score(Collector,int,int) is now public because it is + called from other classes and part of public API. (Uwe Schindler) + +* LUCENE-2865: Weight#scorer(AtomicReaderContext, boolean, boolean) now accepts + a ScorerContext struct instead of booleans.(Simon Willnauer) + +* LUCENE-2882: Cut over SpanQuery#getSpans to AtomicReaderContext to enforce + per segment semantics on SpanQuery & Spans. (Simon Willnauer) + +* LUCENE-2236: Similarity can now be configured on a per-field basis. See the + migration notes in MIGRATE.txt for more details. (Robert Muir, Doron Cohen) + Changes in Runtime Behavior -* LUCENE-2650, LUCENE-2825: The behavior of FSDirectory.open has changed. On 64-bit - Windows and Solaris systems that support unmapping, FSDirectory.open returns - MMapDirectory. Additionally the behavior of MMapDirectory has been - changed to enable unmapping by default if supported by the JRE. - (Mike McCandless, Uwe Schindler, Robert Muir) +* LUCENE-2846: omitNorms now behaves like omitTermFrequencyAndPositions, if you + omitNorms(true) for field "a" for 1000 documents, but then add a document with + omitNorms(false) for field "a", all documents for field "a" will have no norms. + Previously, Lucene would fill the first 1000 documents with "fake norms" from + Similarity.getDefault(). (Robert Muir, Mike Mccandless) -* LUCENE-2790: LogMergePolicy.useCompoundFile's logic now factors in noCFSRatio - to determine whether the passed in segment should be compound. - (Shai Erera, Earwin Burrfoot) +* LUCENE-2846: When some documents contain field "a", and others do not, the + documents that don't have the field get a norm byte value of 0. Previously, Lucene + would populate "fake norms" with Similarity.getDefault() for these documents. + (Robert Muir, Mike Mccandless) + +* LUCENE-2720: IndexWriter throws IndexFormatTooOldException on open, rather + than later when e.g. a merge starts. (Shai Erera, Mike McCandless, Uwe Schindler) -* LUCENE-2805: IndexWriter now increments the index version on every change to - the index instead of for every commit. Committing or closing the IndexWriter - without any changes to the index will not cause any index version increment. - (Simon Willnauer, Mike McCandless) +* LUCENE-1076: The default merge policy is now able to merge + non-contiguous segments, which means docIDs no longer necessarily + say "in order". If this is a problem then you can use either of the + LogMergePolicy impls, and call setRequireContiguousMerge(true). + (Mike McCandless) API Changes @@ -160,17 +171,6 @@ API Changes deleted docs (getDeletedDocs), providing a new Bits interface to directly query by doc ID. -* LUCENE-2402: IndexWriter.deleteUnusedFiles now deletes unreferenced commit - points too. If you use an IndexDeletionPolicy which holds onto index commits - (such as SnapshotDeletionPolicy), you can call this method to remove those - commit points when they are not needed anymore (instead of waiting for the - next commit). (Shai Erera) - -* LUCENE-2674: A new idfExplain method was added to Similarity, that - accepts an incoming docFreq. If you subclass Similarity, make sure - you also override this method on upgrade. (Robert Muir, Mike - McCandless) - * LUCENE-2691: IndexWriter.getReader() has been made package local and is now exposed via open and reopen methods on IndexReader. The semantics of the call is the same as it was prior to the API change. @@ -178,9 +178,18 @@ API Changes * LUCENE-2566: QueryParser: Unary operators +,-,! will not be treated as operators if they are followed by whitespace. (yonik) + +* LUCENE-2831: Weight#scorer, Weight#explain, Filter#getDocIdSet, + Collector#setNextReader & FieldComparator#setNextReader now expect an + AtomicReaderContext instead of an IndexReader. (Simon Willnauer) -* LUCENE-2778: RAMDirectory now exposes newRAMFile() which allows to override - and return a different RAMFile implementation. (Shai Erera) +* LUCENE-2846: Remove the deprecated IndexReader.setNorm(int, String, float). + This method was only syntactic sugar for setNorm(int, String, byte), but + using the global Similarity.getDefault().encodeNormValue. Use the byte-based + method instead to ensure that the norm is encoded with your Similarity. + Also removed norms(String, byte[], int), which was only used by MultiReader + for building top-level norms. If you really need a top-level norms, use + MultiNorms or SlowMultiReaderWrapper. (Robert Muir, Mike Mccandless) New features @@ -220,10 +229,6 @@ New features data and payloads in 5 separate files instead of the 2 used by standard codec), and int block (really a "base" for using block-based compressors like PForDelta for storing postings data). - -* LUCENE-2385: Moved NoDeletionPolicy from benchmark to core. NoDeletionPolicy - can be used to prevent commits from ever getting deleted from the index. - (Shai Erera) * LUCENE-1458, LUCENE-2111: The in-memory terms index used by standard codec is more RAM efficient: terms data is stored as block byte @@ -238,16 +243,6 @@ New features applications that have many unique terms, since it reduces how often a new segment must be flushed given a fixed RAM buffer size. -* LUCENE-1585: IndexWriter now accepts a PayloadProcessorProvider which can - return a DirPayloadProcessor for a given Directory, which returns a - PayloadProcessor for a given Term. The PayloadProcessor will be used to - process the payloads of the segments as they are merged (e.g. if one wants to - rewrite payloads of external indexes as they are added, or of local ones). - (Shai Erera, Michael Busch, Mike McCandless) - -* LUCENE-2440: Add support for custom ExecutorService in - ParallelMultiSearcher (Edward Drapkin via Mike McCandless) - * LUCENE-2489: Added PerFieldCodecWrapper (in oal.index.codecs) which lets you set the Codec per field (Mike McCandless) @@ -258,17 +253,6 @@ New features SegmentInfosReader to allow customization of SegmentInfos data. (Andrzej Bialecki) -* LUCENE-2559: Added SegmentReader.reopen methods (John Wang via Mike - McCandless) - -* LUCENE-2590: Added Scorer.visitSubScorers, and Scorer.freq. Along - with a custom Collector these experimental methods make it possible - to gather the hit-count per sub-clause and per document while a - search is running. (Simon Willnauer, Mike McCandless) - -* LUCENE-2636: Added MultiCollector which allows running the search with several - Collectors. (Shai Erera) - * LUCENE-2504: FieldComparator.setNextReader now returns a FieldComparator instance. You can "return this", to just reuse the same instance, or you can return a comparator optimized to the new @@ -326,19 +310,11 @@ New features terms dict. This impl stores the indexed terms in an FST, which is much more RAM efficient than FixedGapTermsIndex. (Mike McCandless) +* LUCENE-2862: Added TermsEnum.totalTermFreq() and + Terms.getSumTotalTermFreq(). (Mike McCandless, Robert Muir) + Optimizations -* LUCENE-2410: ~20% speedup on exact (slop=0) PhraseQuery matching. - (Mike McCandless) - -* LUCENE-2531: Fix issue when sorting by a String field that was - causing too many fallbacks to compare-by-value (instead of by-ord). - (Mike McCandless) - -* LUCENE-2574: IndexInput exposes copyBytes(IndexOutput, long) to allow for - efficient copying by sub-classes. Optimized copy is implemented for RAM and FS - streams. (Shai Erera) - * LUCENE-2588: Don't store unnecessary suffixes when writing the terms index, saving RAM in IndexReader; change default terms index interval from 128 to 32, because the terms index now requires much @@ -348,12 +324,15 @@ Optimizations not seek backwards when a sub-range has no terms. It now only seeks when the current term is less than the next sub-range's lower end. (Uwe Schindler, Mike McCandless) - -Documentation - -* LUCENE-2579: Fix oal.search's package.html description of abstract - methods. (Santiago M. Mola via Mike McCandless) +* LUCENE-2694: Optimize MultiTermQuery to be single pass for Term lookups. + MultiTermQuery now stores TermState per leaf reader during rewrite to re- + seek the term dictionary in TermQuery / TermWeight. + (Simon Willnauer, Mike McCandless, Robert Muir) + +* LUCENE-2897: Apply deleted terms while flushing a segment. We still + buffer deleted terms to later apply to past segments. (Mike McCandless) + Bug fixes * LUCENE-2633: PackedInts Packed32 and Packed64 did not support internal @@ -363,13 +342,6 @@ Bug fixes with more document deletions is requested before a reader with fewer deletions, provided they share some segments. (yonik) -* LUCENE-2802: NRT DirectoryReader returned incorrect values from - getVersion, isOptimized, getCommitUserData, getIndexCommit and isCurrent due - to a mutable reference to the IndexWriters SegmentInfos. - (Simon Willnauer, Earwin Burrfoot) - - - ======================= Lucene 3.x (not yet released) ======================= Changes in backwards compatibility policy @@ -408,9 +380,9 @@ Changes in backwards compatibility policy * LUCENE-2302: The new interface for term attributes, CharTermAttribute, now implements CharSequence. This requires the toString() methods of CharTermAttribute, deprecated TermAttribute, and Token to return only - the term text and no other attribute contents. - TODO: Point to new attribute inspection API coming with LUCENE-2374. - (Uwe Schindler, Robert Muir) + the term text and no other attribute contents. LUCENE-2374 implements + an attribute reflection API to no longer rely on toString() for attribute + inspection. (Uwe Schindler, Robert Muir) * LUCENE-2372, LUCENE-2389: StandardAnalyzer, KeywordAnalyzer, PerFieldAnalyzerWrapper, WhitespaceTokenizer are now final. Also removed @@ -434,10 +406,37 @@ Changes in backwards compatibility policy * LUCENE-2733: Removed public constructors of utility classes with only static methods to prevent instantiation. (Uwe Schindler) -* LUCENE-2753: IndexReader and DirectoryReader .listCommits() now return a List - instead of a Collection, guaranteeing the commits are sorted from oldest to - latest. (Shai Erera) +* LUCENE-2602: The default (LogByteSizeMergePolicy) merge policy now + takes deletions into account by default. You can disable this by + calling setCalibrateSizeByDeletes(false) on the merge policy. (Mike + McCandless) + +* LUCENE-2529, LUCENE-2668: Position increment gap and offset gap of empty + values in multi-valued field has been changed for some cases in index. + If you index empty fields and uses positions/offsets information on that + fields, reindex is recommended. (David Smiley, Koji Sekiguchi) +* LUCENE-2804: Directory.setLockFactory new declares throwing an IOException. + (Shai Erera, Robert Muir) + +* LUCENE-2837: Added deprecations noting that in 4.0, Searcher and + Searchable are collapsed into IndexSearcher; contrib/remote and + MultiSearcher have been removed. (Mike McCandless) + +* LUCENE-2854: Deprecated SimilarityDelegator and + Similarity.lengthNorm; the latter is now final, forcing any custom + Similarity impls to cutover to the more general computeNorm (Robert + Muir, Mike McCandless) + +* LUCENE-2869: Deprecated Query.getSimilarity: instead of using + "runtime" subclassing/delegation, subclass the Weight instead. + (Robert Muir) + +* LUCENE-2674: A new idfExplain method was added to Similarity, that + accepts an incoming docFreq. If you subclass Similarity, make sure + you also override this method on upgrade. (Robert Muir, Mike + McCandless) + Changes in runtime behavior * LUCENE-1923: Made IndexReader.toString() produce something @@ -453,7 +452,7 @@ Changes in runtime behavior invokes a merge on the incoming and target segments, but instead copies the segments to the target index. You can call maybeMerge or optimize after this method completes, if you need to. - + In addition, Directory.copyTo* were removed in favor of copy which takes the target Directory, source and target files as arguments, and copies the source file to the target Directory under the target file name. (Shai Erera) @@ -465,10 +464,41 @@ Changes in runtime behavior usage, allowing applications to accidentally open two writers on the same directory. (Mike McCandless) -* LUCENE-2701: maxMergeMB and maxMergeDocs constraints set on LogMergePolicy now - affect optimize() as well (as opposed to only regular merges). This means that - you can run optimize() and too large segments won't be merged. (Shai Erera) +* LUCENE-2701: maxMergeMBForOptimize and maxMergeDocs constraints set on + LogMergePolicy now affect optimize() as well (as opposed to only regular + merges). This means that you can run optimize() and too large segments won't + be merged. (Shai Erera) +* LUCENE-2753: IndexReader and DirectoryReader .listCommits() now return a List, + guaranteeing the commits are sorted from oldest to latest. (Shai Erera) + +* LUCENE-2785: TopScoreDocCollector, TopFieldCollector and + the IndexSearcher search methods that take an int nDocs will now + throw IllegalArgumentException if nDocs is 0. Instead, you should + use the newly added TotalHitCountCollector. (Mike McCandless) + +* LUCENE-2790: LogMergePolicy.useCompoundFile's logic now factors in noCFSRatio + to determine whether the passed in segment should be compound. + (Shai Erera, Earwin Burrfoot) + +* LUCENE-2805: IndexWriter now increments the index version on every change to + the index instead of for every commit. Committing or closing the IndexWriter + without any changes to the index will not cause any index version increment. + (Simon Willnauer, Mike McCandless) + +* LUCENE-2650, LUCENE-2825: The behavior of FSDirectory.open has changed. On 64-bit + Windows and Solaris systems that support unmapping, FSDirectory.open returns + MMapDirectory. Additionally the behavior of MMapDirectory has been + changed to enable unmapping by default if supported by the JRE. + (Mike McCandless, Uwe Schindler, Robert Muir) + +* LUCENE-2829: Improve the performance of "primary key" lookup use + case (running a TermQuery that matches one document) on a + multi-segment index. (Robert Muir, Mike McCandless) + +* LUCENE-2010: Segments with 100% deleted documents are now removed on + IndexReader or IndexWriter commit. (Uwe Schindler, Mike McCandless) + API Changes * LUCENE-2076: Rename FSDirectory.getFile -> getDirectory. (George @@ -479,7 +509,7 @@ API Changes custom Similarity can alter how norms are encoded, though they must still be encoded as a single byte (Johan Kindgren via Mike McCandless) - + * LUCENE-2103: NoLockFactory should have a private constructor; until Lucene 4.0 the default one will be deprecated. (Shai Erera via Uwe Schindler) @@ -551,17 +581,77 @@ API Changes (such as SnapshotDeletionPolicy), you can call this method to remove those commit points when they are not needed anymore (instead of waiting for the next commit). (Shai Erera) - -* LUCENE-2455: IndexWriter.addIndexesNoOptimize was renamed to addIndexes. - IndexFileNames.segmentFileName now takes another parameter to accommodate - custom file names. You should use this method to name all your files. - (Shai Erera) * LUCENE-2481: SnapshotDeletionPolicy.snapshot() and release() were replaced with equivalent ones that take a String (id) as argument. You can pass whatever ID you want, as long as you use the same one when calling both. (Shai Erera) +* LUCENE-2356: Add IndexWriterConfig.set/getReaderTermIndexDivisor, to + set what IndexWriter passes for termsIndexDivisor to the readers it + opens internally when apply deletions or creating a near-real-time + reader. (Earwin Burrfoot via Mike McCandless) + +* LUCENE-2167,LUCENE-2699,LUCENE-2763,LUCENE-2847: StandardTokenizer/Analyzer + in common/standard/ now implement the Word Break rules from the Unicode 6.0.0 + Text Segmentation algorithm (UAX#29), covering the full range of Unicode code + points, including values from U+FFFF to U+10FFFF + + ClassicTokenizer/Analyzer retains the old (pre-Lucene 3.1) StandardTokenizer/ + Analyzer implementation and behavior. Only the Unicode Basic Multilingual + Plane (code points from U+0000 to U+FFFF) is covered. + + UAX29URLEmailTokenizer tokenizes URLs and E-mail addresses according to the + relevant RFCs, in addition to implementing the UAX#29 Word Break rules. + (Steven Rowe, Robert Muir, Uwe Schindler) + +* LUCENE-2778: RAMDirectory now exposes newRAMFile() which allows to override + and return a different RAMFile implementation. (Shai Erera) + +* LUCENE-2785: Added TotalHitCountCollector whose sole purpose is to + count the number of hits matching the query. (Mike McCandless) + +* LUCENE-2846: Deprecated IndexReader.setNorm(int, String, float). This method + is only syntactic sugar for setNorm(int, String, byte), but using the global + Similarity.getDefault().encodeNormValue(). Use the byte-based method instead + to ensure that the norm is encoded with your Similarity. + (Robert Muir, Mike McCandless) + +* LUCENE-2374: Added Attribute reflection API: It's now possible to inspect the + contents of AttributeImpl and AttributeSource using a well-defined API. + This is e.g. used by Solr's AnalysisRequestHandlers to display all attributes + in a structured way. + There are also some backwards incompatible changes in toString() output, + as LUCENE-2302 introduced the CharSequence interface to CharTermAttribute + leading to changed toString() return values. The new API allows to get a + string representation in a well-defined way using a new method + reflectAsString(). For backwards compatibility reasons, when toString() + was implemented by implementation subclasses, the default implementation of + AttributeImpl.reflectWith() uses toString()s output instead to report the + Attribute's properties. Otherwise, reflectWith() uses Java's reflection + (like toString() did before) to get the attribute properties. + In addition, the mandatory equals() and hashCode() are no longer required + for AttributeImpls, but can still be provided (if needed). + (Uwe Schindler) + +* LUCENE-2876: Deprecated Scorer.getSimilarity(). If your Scorer uses a Similarity, + it should keep it itself. Fixed Scorers to pass their parent Weight, so that + Scorer.visitSubScorers (LUCENE-2590) will work correctly. + (Robert Muir, Doron Cohen) + +* LUCENE-2900: When opening a near-real-time (NRT) reader + (IndexReader.re/open(IndexWriter)) you can now specify whether + deletes should be applied. Applying deletes can be costly, and some + expert use cases can handle seeing deleted documents returned. The + deletes remain buffered so that the next time you open an NRT reader + and pass true, all deletes will be a applied. (Mike McCandless) + +* LUCENE-1253: LengthFilter (and Solr's KeepWordTokenFilter) now + require up front specification of enablePositionIncrement. Together with + StopFilter they have a common base class (FilteringTokenFilter) that handles + the position increments automatically. Implementors only need to override an + accept() method that filters tokens. (Uwe Schindler, Robert Muir) + Bug fixes * LUCENE-2249: ParallelMultiSearcher should shut down thread pool on @@ -582,10 +672,6 @@ Bug fixes a prior (corrupt) index missing its segments_N file. (Mike McCandless) -* LUCENE-2534: fix over-sharing bug in - MultiTermsEnum.docs/AndPositionsEnum. (Robert Muir, Mike - McCandless) - * LUCENE-2458: QueryParser no longer automatically forms phrase queries, assuming whitespace tokenization. Previously all CJK queries, for example, would be turned into phrase queries. The old behavior is preserved with @@ -604,7 +690,41 @@ Bug fixes can cause the same document to score to differently depending on what segment it resides in. (yonik) -* LUCENE-2272: Fix explain in PayloadNearQuery and also fix scoring issue (Peter Keegan via Grant Ingersoll) +* LUCENE-2272: Fix explain in PayloadNearQuery and also fix scoring issue (Peter Keegan via Grant Ingersoll) + +* LUCENE-2732: Fix charset problems in XML loading in + HyphenationCompoundWordTokenFilter. (Uwe Schindler) + +* LUCENE-2802: NRT DirectoryReader returned incorrect values from + getVersion, isOptimized, getCommitUserData, getIndexCommit and isCurrent due + to a mutable reference to the IndexWriters SegmentInfos. + (Simon Willnauer, Earwin Burrfoot) + +* LUCENE-2852: Fixed corner case in RAMInputStream that would hit a + false EOF after seeking to EOF then seeking back to same block you + were just in and then calling readBytes (Robert Muir, Mike McCandless) + +* LUCENE-2860: Fixed SegmentInfo.sizeInBytes to factor includeDocStores when it + decides whether to return the cached computed size or not. (Shai Erera) + +* LUCENE-2584: SegmentInfo.files() could hit ConcurrentModificationException if + called by multiple threads. (Alexander Kanarsky via Shai Erera) + +* LUCENE-2809: Fixed IndexWriter.numDocs to take into account + applied but not yet flushed deletes. (Mike McCandless) + +* LUCENE-2879: MultiPhraseQuery previously calculated its phrase IDF by summing + internally, it now calls Similarity.idfExplain(Collection, IndexSearcher). + (Robert Muir) + +* LUCENE-2693: RAM used by IndexWriter was slightly incorrectly computed. + (Jason Rutherglen via Shai Erera) + +* LUCENE-1846: DateTools now uses the US locale everywhere, so DateTools.round() + is safe also in strange locales. (Uwe Schindler) + +* LUCENE-2891: IndexWriterConfig did not accept -1 in setReaderTermIndexDivisor, + which can be used to prevent loading the terms index into memory. (Shai Erera) New features @@ -630,8 +750,8 @@ New features stopwords, and implement many analyzers in contrib with it. (Simon Willnauer via Robert Muir) -* LUCENE-2198: Support protected words in stemming TokenFilters using a - new KeywordAttribute. (Simon Willnauer via Uwe Schindler) +* LUCENE-2198, LUCENE-2901: Support protected words in stemming TokenFilters using a + new KeywordAttribute. (Simon Willnauer, Drew Farris via Uwe Schindler) * LUCENE-2183, LUCENE-2240, LUCENE-2241: Added Unicode 4 support to CharTokenizer and its subclasses. CharTokenizer now has new @@ -677,6 +797,16 @@ New features can be used to prevent commits from ever getting deleted from the index. (Shai Erera) +* LUCENE-1585: IndexWriter now accepts a PayloadProcessorProvider which can + return a DirPayloadProcessor for a given Directory, which returns a + PayloadProcessor for a given Term. The PayloadProcessor will be used to + process the payloads of the segments as they are merged (e.g. if one wants to + rewrite payloads of external indexes as they are added, or of local ones). + (Shai Erera, Michael Busch, Mike McCandless) + +* LUCENE-2440: Add support for custom ExecutorService in + ParallelMultiSearcher (Edward Drapkin via Mike McCandless) + * LUCENE-2295: Added a LimitTokenCountAnalyzer / LimitTokenCountFilter to wrap any other Analyzer and provide the same functionality as MaxFieldLength provided on IndexWriter. This patch also fixes a bug @@ -684,9 +814,17 @@ New features * LUCENE-2526: Don't throw NPE from MultiPhraseQuery.toString when it's empty. (Ross Woolf via Mike McCandless) + +* LUCENE-2559: Added SegmentReader.reopen methods (John Wang via Mike + McCandless) -* LUCENE-2671: Add SortField.setMissingValue( v ) to enable sorting - behavior for documents that do not include the given field. (ryan) +* LUCENE-2590: Added Scorer.visitSubScorers, and Scorer.freq. Along + with a custom Collector these experimental methods make it possible + to gather the hit-count per sub-clause and per document while a + search is running. (Simon Willnauer, Mike McCandless) + +* LUCENE-2636: Added MultiCollector which allows running the search with several + Collectors. (Shai Erera) * LUCENE-2754, LUCENE-2757: Added a wrapper around MultiTermQueries to add span support: SpanMultiTermQueryWrapper. @@ -698,9 +836,21 @@ New features is no longer needed and discouraged for that use case. Directly wrapping Query improves performance, as out-of-order collection is now supported. (Uwe Schindler) + +* LUCENE-2864: Add getMaxTermFrequency (maximum within-document TF) to + FieldInvertState so that it can be used in Similarity.computeNorm. + (Robert Muir) + +* LUCENE-2474: Added expert ReaderFinishedListener API to + IndexReader, to allow apps that maintain external per-segment caches + to evict entries when a segment is finished. (Shay Banon, Yonik + Seeley, Mike McCandless) Optimizations +* LUCENE-2494: Use CompletionService in ParallelMultiSearcher instead of + simple polling for results. (Edward Drapkin, Simon Willnauer) + * LUCENE-2075: Terms dict cache is now shared across threads instead of being stored separately in thread local storage. Also fixed terms dict so that the cache is used when seeking the thread local @@ -763,6 +913,17 @@ Optimizations (getStrings, getStringIndex), consume quite a bit less RAM in most cases. (Mike McCandless) +* LUCENE-2410: ~20% speedup on exact (slop=0) PhraseQuery matching. + (Mike McCandless) + +* LUCENE-2531: Fix issue when sorting by a String field that was + causing too many fallbacks to compare-by-value (instead of by-ord). + (Mike McCandless) + +* LUCENE-2574: IndexInput exposes copyBytes(IndexOutput, long) to allow for + efficient copying by sub-classes. Optimized copy is implemented for RAM and FS + streams. (Shai Erera) + * LUCENE-2719: Improved TermsHashPerField's sorting to use a better quick sort algorithm that dereferences the pivot element not on every compare call. Also replaced lots of sorting code in Lucene @@ -777,6 +938,16 @@ Optimizations DirectoryReaders or other MultiReaders. This saves lots of memory during merge of norms. (Uwe Schindler, Mike McCandless) +* LUCENE-2824: Optimize BufferedIndexInput to do less bounds checks. + (Robert Muir) + +* LUCENE-2010: Segments with 100% deleted documents are now removed on + IndexReader or IndexWriter commit. (Uwe Schindler, Mike McCandless) + +* LUCENE-1472: Removed synchronization from static DateTools methods + by using a ThreadLocal. Also converted DateTools.Resolution to a + Java 5 enum (this should not break backwards). (Uwe Schindler) + Build * LUCENE-2124: Moved the JDK-based collation support from contrib/collation @@ -804,6 +975,13 @@ Build "ant idea". See http://wiki.apache.org/lucene-java/HowtoConfigureIntelliJ (Steven Rowe) +* LUCENE-2657: Switch from using Maven POM templates to full POMs when + generating Maven artifacts (Steven Rowe) + +* LUCENE-2609: Added jar-test-framework Ant target which packages Lucene's + tests' framework classes. (Drew Farris, Grant Ingersoll, Shai Erera, Steven + Rowe) + Test Cases * LUCENE-2037 Allow Junit4 tests in our environment (Erick Erickson @@ -842,6 +1020,21 @@ Test Cases as Eclipse and IntelliJ. (Paolo Castagna, Steven Rowe via Robert Muir) +* LUCENE-2804: add newFSDirectory to LuceneTestCase to create a FSDirectory at + random. (Shai Erera, Robert Muir) + +Documentation + +* LUCENE-2579: Fix oal.search's package.html description of abstract + methods. (Santiago M. Mola via Mike McCandless) + +* LUCENE-2625: Add a note to IndexReader.termDocs() with additional verbiage + that the TermEnum must be seeked since it is unpositioned. + (Adriano Crestani via Robert Muir) + +* LUCENE-2894: Use google-code-prettify for syntax highlighting in javadoc. + (Koji Sekiguchi) + ================== Release 2.9.4 / 3.0.3 2010-12-03 ==================== Changes in runtime behavior diff --git a/lucene/MIGRATE.txt b/lucene/MIGRATE.txt index c55dd8111d1..c5e85969e03 100644 --- a/lucene/MIGRATE.txt +++ b/lucene/MIGRATE.txt @@ -325,6 +325,15 @@ LUCENE-1458, LUCENE-2111: Flexible Indexing global AttributeSource to each segments TermsEnum. The TermsEnum is consumer and gets the current minimum competitive boosts (MTQ.MaxNonCompetitiveBoostAttribute). -* LUCENE-2761: DataInput.readVInt/readVLong and DataOutput.writeVInt/writeVLong - are final. If you subclassed this code before to encode variable-length - integers in some specialized way, use the Codec API instead. +* LUCENE-2374: The backwards layer in AttributeImpl was removed. To support correct + reflection of AttributeImpl instances, where the reflection was done using deprecated + toString() parsing, you have to now override reflectWith() to customize output. + toString() is no longer implemented by AttributeImpl, so if you have overridden + toString(), port your customization over to reflectWith(). reflectAsString() would + then return what toString() did before. + +* LUCENE-2236: DefaultSimilarity can no longer be set statically (and dangerously) for the entire JVM. + Instead, IndexWriterConfig and IndexSearcher now take a SimilarityProvider. + Similarity can now be configured on a per-field basis. + Similarity retains only the field-specific relevance methods such as tf() and idf(). + Methods that apply to the entire query such as coord() and queryNorm() exist in SimilarityProvider. diff --git a/lucene/NOTICE.txt b/lucene/NOTICE.txt index faeb75525b8..46f1322b35d 100644 --- a/lucene/NOTICE.txt +++ b/lucene/NOTICE.txt @@ -27,3 +27,6 @@ The class org.apache.lucene.SorterTemplate was inspired by CGLIB's class with the same name. The implementation part is mainly done using pre-existing Lucene sorting code. In-place stable mergesort was borrowed from CGLIB, which is Apache-licensed. + +The Google Code Prettify is Apache License 2.0. +See http://code.google.com/p/google-code-prettify/ diff --git a/lucene/build.xml b/lucene/build.xml index fddb875c9e9..4cb7a82a532 100644 --- a/lucene/build.xml +++ b/lucene/build.xml @@ -17,7 +17,8 @@ limitations under the License. --> - + @@ -32,12 +33,13 @@ + - + @@ -48,7 +50,7 @@ excludes="contrib/db/*/lib/,contrib/*/ext-libs/,src/site/build/" /> + depends="javadocs-all, javadocs-core, javadocs-contrib, javadocs-test-framework"> ${Name} ${version} Javadoc Index @@ -217,7 +219,6 @@ - @@ -260,7 +261,7 @@ - + @@ -390,19 +391,38 @@ - + - - + + + + + + - - + + + + + + + + + + + + + @@ -600,20 +620,30 @@ - - - - - - + + - - - - - + + + + + + + + + + + + + + + diff --git a/lucene/common-build.xml b/lucene/common-build.xml index 8223f3ffbc5..80dbcce6368 100644 --- a/lucene/common-build.xml +++ b/lucene/common-build.xml @@ -25,6 +25,9 @@ + + + @@ -110,6 +113,7 @@ + @@ -318,32 +322,22 @@
- - - + - - - - - - - - + + - + - - @@ -370,6 +364,8 @@ + + @@ -379,12 +375,13 @@ - + + manifest="${manifest.file}" + excludes="@{excludes}"> @@ -397,7 +394,12 @@ - + + + + + @@ -560,6 +562,9 @@ + + + @@ -590,6 +595,9 @@ + + + @@ -648,6 +656,23 @@ + + + + + + + + + + + + + + + + @@ -703,6 +728,9 @@ + + + +
+ + ]]>
diff --git a/lucene/contrib/CHANGES.txt b/lucene/contrib/CHANGES.txt index 24eed06507a..7f2fe40dffc 100644 --- a/lucene/contrib/CHANGES.txt +++ b/lucene/contrib/CHANGES.txt @@ -4,53 +4,39 @@ Lucene contrib change Log Build - * LUCENE-2413: Moved the demo out of lucene core and into contrib/demo. - (Robert Muir) - * LUCENE-2845: Moved contrib/benchmark to modules. New Features - * LUCENE-2604: Added RegexpQuery support to contrib/queryparser. - (Simon Willnauer, Robert Muir) + * LUCENE-2604: Added RegexpQuery support to contrib/queryparser. + (Simon Willnauer, Robert Muir) - * LUCENE-2500: Added DirectIOLinuxDirectory, a Linux-specific - Directory impl that uses the O_DIRECT flag to bypass the buffer - cache. This is useful to prevent segment merging from evicting - pages from the buffer cache, since fadvise/madvise do not seem. - (Michael McCandless) + * LUCENE-2373: Added a Codec implementation that works with append-only + filesystems (such as e.g. Hadoop DFS). SegmentInfos writing/reading + code is refactored to support append-only FS, and to allow for future + customization of per-segment information. (Andrzej Bialecki) - * LUCENE-2373: Added a Codec implementation that works with append-only - filesystems (such as e.g. Hadoop DFS). SegmentInfos writing/reading - code is refactored to support append-only FS, and to allow for future - customization of per-segment information. (Andrzej Bialecki) + * LUCENE-2479: Added ability to provide a sort comparator for spelling suggestions along + with two implementations. The existing comparator (score, then frequency) is the default (Grant Ingersoll) - * LUCENE-2479: Added ability to provide a sort comparator for spelling suggestions along - with two implementations. The existing comparator (score, then frequency) is the default (Grant Ingersoll) - - * LUCENE-2608: Added the ability to specify the accuracy at method time in the SpellChecker. The per class - method is also still available. (Grant Ingersoll) + * LUCENE-2608: Added the ability to specify the accuracy at method time in the SpellChecker. The per class + method is also still available. (Grant Ingersoll) - * LUCENE-2507: Added DirectSpellChecker, which retrieves correction candidates directly - from the term dictionary using levenshtein automata. (Robert Muir) + * LUCENE-2507: Added DirectSpellChecker, which retrieves correction candidates directly + from the term dictionary using levenshtein automata. (Robert Muir) - * LUCENE-2791: Added WindowsDirectory, a Windows-specific Directory impl - that doesn't synchronize on the file handle. This can be useful to - avoid the performance problems of SimpleFSDirectory and NIOFSDirectory. - (Robert Muir, Simon Willnauer, Uwe Schindler, Michael McCandless) - - * LUCENE-2836: Add FieldCacheRewriteMethod, which rewrites MultiTermQueries - using the FieldCache's TermsEnum. (Robert Muir) + * LUCENE-2836: Add FieldCacheRewriteMethod, which rewrites MultiTermQueries + using the FieldCache's TermsEnum. (Robert Muir) API Changes - * LUCENE-2606: Changed RegexCapabilities interface to fix thread - safety, serialization, and performance problems. If you have - written a custom RegexCapabilities it will need to be updated - to the new API. (Robert Muir, Uwe Schindler) + * LUCENE-2606: Changed RegexCapabilities interface to fix thread + safety, serialization, and performance problems. If you have + written a custom RegexCapabilities it will need to be updated + to the new API. (Robert Muir, Uwe Schindler) - * LUCENE-2638 MakeHighFreqTerms.TermStats public to make it more useful - for API use. (Andrzej Bialecki) + * LUCENE-2638 MakeHighFreqTerms.TermStats public to make it more useful + for API use. (Andrzej Bialecki) ======================= Lucene 3.x (not yet released) ======================= @@ -100,6 +86,11 @@ Changes in runtime behavior Bug fixes + * LUCENE-2855: contrib queryparser was using CharSequence as key in some internal + Map instances, which was leading to incorrect behaviour, since some CharSequence + implementors do not override hashcode and equals methods. Now the internal Maps + are using String instead. (Adriano Crestani) + * LUCENE-2068: Fixed ReverseStringFilter which was not aware of supplementary characters. During reverse the filter created unpaired surrogates, which will be replaced by U+FFFD by the indexer, but not at query time. The filter @@ -117,41 +108,48 @@ Bug fixes default. (Robert Muir, Uwe Schindler, Simon Willnauer) * LUCENE-2184: Fixed bug with handling best fit value when the proper best fit value is - not an indexed field. Note, this change affects the APIs. (Grant Ingersoll) + not an indexed field. Note, this change affects the APIs. (Grant Ingersoll) * LUCENE-2359: Fix bug in CartesianPolyFilterBuilder related to handling of behavior around - the 180th meridian (Grant Ingersoll) + the 180th meridian (Grant Ingersoll) * LUCENE-2404: Fix bugs with position increment and empty tokens in ThaiWordFilter. For matchVersion >= 3.1 the filter also no longer lowercases. ThaiAnalyzer will use a separate LowerCaseFilter instead. (Uwe Schindler, Robert Muir) -* LUCENE-2615: Fix DirectIOLinuxDirectory to not assign bogus - permissions to newly created files, and to not silently hardwire - buffer size to 1 MB. (Mark Miller, Robert Muir, Mike McCandless) + * LUCENE-2615: Fix DirectIOLinuxDirectory to not assign bogus + permissions to newly created files, and to not silently hardwire + buffer size to 1 MB. (Mark Miller, Robert Muir, Mike McCandless) -* LUCENE-2629: Fix gennorm2 task for generating ICUFoldingFilter's .nrm file. This allows - you to customize its normalization/folding, by editing the source data files in src/data - and regenerating a new .nrm with 'ant gennorm2'. (David Bowen via Robert Muir) + * LUCENE-2629: Fix gennorm2 task for generating ICUFoldingFilter's .nrm file. This allows + you to customize its normalization/folding, by editing the source data files in src/data + and regenerating a new .nrm with 'ant gennorm2'. (David Bowen via Robert Muir) -* LUCENE-2653: ThaiWordFilter depends on the JRE having a Thai dictionary, which is not - always the case. If the dictionary is unavailable, the filter will now throw - UnsupportedOperationException in the constructor. (Robert Muir) + * LUCENE-2653: ThaiWordFilter depends on the JRE having a Thai dictionary, which is not + always the case. If the dictionary is unavailable, the filter will now throw + UnsupportedOperationException in the constructor. (Robert Muir) -* LUCENE-589: Fix contrib/demo for international documents. - (Curtis d'Entremont via Robert Muir) + * LUCENE-589: Fix contrib/demo for international documents. + (Curtis d'Entremont via Robert Muir) -* LUCENE-2246: Fix contrib/demo for Turkish html documents. - (Selim Nadi via Robert Muir) + * LUCENE-2246: Fix contrib/demo for Turkish html documents. + (Selim Nadi via Robert Muir) -* LUCENE-590: Demo HTML parser gives incorrect summaries when title is repeated as a heading - (Curtis d'Entremont via Robert Muir) + * LUCENE-590: Demo HTML parser gives incorrect summaries when title is repeated as a heading + (Curtis d'Entremont via Robert Muir) -* LUCENE-591: The demo indexer now indexes meta keywords. - (Curtis d'Entremont via Robert Muir) + * LUCENE-591: The demo indexer now indexes meta keywords. + (Curtis d'Entremont via Robert Muir) + + * LUCENE-2874: Highlighting overlapping tokens outputted doubled words. + (Pierre Gossé via Robert Muir) API Changes + * LUCENE-2867: Some contrib queryparser methods that receives CharSequence as + identifier, such as QueryNode#unsetTag(CharSequence), were deprecated and + will be removed on version 4. (Adriano Crestani) + * LUCENE-2147: Spatial GeoHashUtils now always decode GeoHash strings with full precision. GeoHash#decode_exactly(String) was merged into GeoHash#decode(String). (Chris Male, Simon Willnauer) @@ -192,6 +190,12 @@ API Changes New features + * LUCENE-2500: Added DirectIOLinuxDirectory, a Linux-specific + Directory impl that uses the O_DIRECT flag to bypass the buffer + cache. This is useful to prevent segment merging from evicting + pages from the buffer cache, since fadvise/madvise do not seem. + (Michael McCandless) + * LUCENE-2306: Add NumericRangeFilter and NumericRangeQuery support to XMLQueryParser. (Jingkei Ly, via Mark Harwood) @@ -281,6 +285,14 @@ New features BooleanModifiersQueryNodeProcessor, for example instead of GroupQueryNodeProcessor. (Adriano Crestani via Robert Muir) + * LUCENE-2791: Added WindowsDirectory, a Windows-specific Directory impl + that doesn't synchronize on the file handle. This can be useful to + avoid the performance problems of SimpleFSDirectory and NIOFSDirectory. + (Robert Muir, Simon Willnauer, Uwe Schindler, Michael McCandless) + + * LUCENE-2842: Add analyzer for Galician. Also adds the RSLP (Orengo) stemmer + for Portuguese. (Robert Muir) + Build * LUCENE-2124: Moved the JDK-based collation support from contrib/collation @@ -302,7 +314,10 @@ Build (Robert Muir) * LUCENE-2833: Upgrade contrib/ant's jtidy jar file to r938 (Robert Muir) - + + * LUCENE-2413: Moved the demo out of lucene core and into contrib/demo. + (Robert Muir) + Optimizations * LUCENE-2157: DelimitedPayloadTokenFilter no longer copies the buffer diff --git a/lucene/contrib/ant/pom.xml.template b/lucene/contrib/ant/pom.xml.template deleted file mode 100644 index 1bcf3e113e6..00000000000 --- a/lucene/contrib/ant/pom.xml.template +++ /dev/null @@ -1,48 +0,0 @@ - - - - - 4.0.0 - - org.apache.lucene - lucene-contrib - @version@ - - org.apache.lucene - lucene-ant - jar - @version@ - Lucene Contrib Ant - Ant task to create Lucene indexes - - - jtidy - jtidy - 4aug2000r7-dev - - - ant - ant - 1.6.5 - - - diff --git a/lucene/contrib/contrib-build.xml b/lucene/contrib/contrib-build.xml index 802d307eb6a..bbd35082ac2 100644 --- a/lucene/contrib/contrib-build.xml +++ b/lucene/contrib/contrib-build.xml @@ -40,6 +40,7 @@ + @@ -50,7 +51,6 @@ - @@ -71,7 +71,7 @@ diff --git a/lucene/contrib/db/bdb-je/build.xml b/lucene/contrib/db/bdb-je/build.xml index b751d84287d..cc8c1c8d96b 100644 --- a/lucene/contrib/db/bdb-je/build.xml +++ b/lucene/contrib/db/bdb-je/build.xml @@ -39,13 +39,6 @@ - - - - - - - - - - 4.0.0 - - org.apache.lucene - lucene-contrib - @version@ - - org.apache.lucene - lucene-bdb-je - jar - @version@ - lucene-contrib-bdb-je - Berkeley DB based Directory implementation - - - sleepycat - je - ${sleepycat-je-version} - - - diff --git a/lucene/contrib/db/bdb/build.xml b/lucene/contrib/db/bdb/build.xml index 39d22d05885..4f7c74207f0 100644 --- a/lucene/contrib/db/bdb/build.xml +++ b/lucene/contrib/db/bdb/build.xml @@ -39,13 +39,6 @@ - - - - - - - - + Lucene Demo @@ -75,4 +75,23 @@ outputDir="src/java/org/apache/lucene/demo/html" /> + + + + + + + + + + + + + + + + diff --git a/lucene/contrib/demo/src/java/org/apache/lucene/demo/IndexHTML.java b/lucene/contrib/demo/src/java/org/apache/lucene/demo/IndexHTML.java index 07c18cbe3fd..75090df931a 100644 --- a/lucene/contrib/demo/src/java/org/apache/lucene/demo/IndexHTML.java +++ b/lucene/contrib/demo/src/java/org/apache/lucene/demo/IndexHTML.java @@ -84,8 +84,7 @@ public class IndexHTML { } writer = new IndexWriter(FSDirectory.open(index), new IndexWriterConfig( Version.LUCENE_CURRENT, new StandardAnalyzer(Version.LUCENE_CURRENT)) - .setMaxFieldLength(1000000).setOpenMode( - create ? OpenMode.CREATE : OpenMode.CREATE_OR_APPEND)); + .setOpenMode(create ? OpenMode.CREATE : OpenMode.CREATE_OR_APPEND)); indexDocs(root, index, create); // add new docs System.out.println("Optimizing index..."); diff --git a/lucene/contrib/demo/src/java/org/apache/lucene/demo/SearchFiles.java b/lucene/contrib/demo/src/java/org/apache/lucene/demo/SearchFiles.java index 422e23497d7..6a300459995 100644 --- a/lucene/contrib/demo/src/java/org/apache/lucene/demo/SearchFiles.java +++ b/lucene/contrib/demo/src/java/org/apache/lucene/demo/SearchFiles.java @@ -28,6 +28,7 @@ import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexReader.AtomicReaderContext; import org.apache.lucene.queryParser.QueryParser; import org.apache.lucene.search.Collector; import org.apache.lucene.search.IndexSearcher; @@ -160,9 +161,9 @@ public class SearchFiles { } @Override - public void setNextReader(IndexReader reader, int docBase) + public void setNextReader(AtomicReaderContext context) throws IOException { - this.docBase = docBase; + this.docBase = context.docBase; } @Override diff --git a/lucene/contrib/highlighter/pom.xml.template b/lucene/contrib/highlighter/pom.xml.template deleted file mode 100644 index 08421ee8eed..00000000000 --- a/lucene/contrib/highlighter/pom.xml.template +++ /dev/null @@ -1,45 +0,0 @@ - - - - - 4.0.0 - - org.apache.lucene - lucene-contrib - @version@ - - org.apache.lucene - lucene-highlighter - Lucene Highlighter - @version@ - - This is the highlighter for apache lucene java - - jar - - - org.apache.lucene - lucene-memory - @version@ - - - diff --git a/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/TokenSources.java b/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/TokenSources.java index a6b2840ada4..616d9e26670 100644 --- a/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/TokenSources.java +++ b/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/TokenSources.java @@ -233,7 +233,10 @@ public class TokenSources { .size()]); ArrayUtil.quickSort(tokensInOriginalOrder, new Comparator() { public int compare(Token t1, Token t2) { - return t1.startOffset() - t2.endOffset(); + if (t1.startOffset() == t2.startOffset()) + return t1.endOffset() - t2.endOffset(); + else + return t1.startOffset() - t2.startOffset(); } }); } diff --git a/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java b/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java index e759b1e4b2c..471c29ee070 100644 --- a/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java +++ b/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java @@ -30,6 +30,7 @@ import org.apache.lucene.analysis.CachingTokenFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Term; +import org.apache.lucene.index.IndexReader.AtomicReaderContext; import org.apache.lucene.index.memory.MemoryIndex; import org.apache.lucene.search.*; import org.apache.lucene.search.spans.FieldMaskingSpanQuery; @@ -50,7 +51,7 @@ public class WeightedSpanTermExtractor { private String fieldName; private TokenStream tokenStream; - private Map readers = new HashMap(10); + private Map readers = new HashMap(10); private String defaultField; private boolean expandMultiTermQuery; private boolean cachedTokenStream; @@ -66,11 +67,11 @@ public class WeightedSpanTermExtractor { } private void closeReaders() { - Collection readerSet = readers.values(); + Collection ctxSet = readers.values(); - for (final IndexReader reader : readerSet) { + for (final AtomicReaderContext ctx : ctxSet) { try { - reader.close(); + ctx.reader.close(); } catch (IOException e) { // alert? } @@ -149,7 +150,7 @@ public class WeightedSpanTermExtractor { query = mtq; } if (mtq.getField() != null) { - IndexReader ir = getReaderForField(mtq.getField()); + IndexReader ir = getLeafContextForField(mtq.getField()).reader; extract(query.rewrite(ir), terms); } } else if (query instanceof MultiPhraseQuery) { @@ -234,7 +235,7 @@ public class WeightedSpanTermExtractor { final boolean mustRewriteQuery = mustRewriteQuery(spanQuery); if (mustRewriteQuery) { for (final String field : fieldNames) { - final SpanQuery rewrittenQuery = (SpanQuery) spanQuery.rewrite(getReaderForField(field)); + final SpanQuery rewrittenQuery = (SpanQuery) spanQuery.rewrite(getLeafContextForField(field).reader); queries.put(field, rewrittenQuery); rewrittenQuery.extractTerms(nonWeightedTerms); } @@ -246,12 +247,12 @@ public class WeightedSpanTermExtractor { for (final String field : fieldNames) { - IndexReader reader = getReaderForField(field); + AtomicReaderContext context = getLeafContextForField(field); final Spans spans; if (mustRewriteQuery) { - spans = queries.get(field).getSpans(reader); + spans = queries.get(field).getSpans(context); } else { - spans = spanQuery.getSpans(reader); + spans = spanQuery.getSpans(context); } @@ -317,22 +318,23 @@ public class WeightedSpanTermExtractor { return rv; } - private IndexReader getReaderForField(String field) throws IOException { + private AtomicReaderContext getLeafContextForField(String field) throws IOException { if(wrapToCaching && !cachedTokenStream && !(tokenStream instanceof CachingTokenFilter)) { tokenStream = new CachingTokenFilter(tokenStream); cachedTokenStream = true; } - IndexReader reader = readers.get(field); - if (reader == null) { + AtomicReaderContext context = readers.get(field); + if (context == null) { MemoryIndex indexer = new MemoryIndex(); indexer.addField(field, tokenStream); tokenStream.reset(); IndexSearcher searcher = indexer.createSearcher(); - reader = searcher.getIndexReader(); - readers.put(field, reader); + // MEM index has only atomic ctx + context = (AtomicReaderContext) searcher.getTopReaderContext(); + readers.put(field, context); } - return reader; + return context; } /** diff --git a/lucene/contrib/highlighter/src/java/org/apache/lucene/search/vectorhighlight/package.html b/lucene/contrib/highlighter/src/java/org/apache/lucene/search/vectorhighlight/package.html index ee023abfa70..a71dfb3040d 100644 --- a/lucene/contrib/highlighter/src/java/org/apache/lucene/search/vectorhighlight/package.html +++ b/lucene/contrib/highlighter/src/java/org/apache/lucene/search/vectorhighlight/package.html @@ -66,7 +66,7 @@ sample text.

Step 1.

In Step 1, Fast Vector Highlighter generates {@link org.apache.lucene.search.vectorhighlight.FieldQuery.QueryPhraseMap} from the user query. QueryPhraseMap consists of the following members:

-
+
 public class QueryPhraseMap {
   boolean terminal;
   int slop;   // valid if terminal == true and phraseHighlight == true
diff --git a/lucene/contrib/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterPhraseTest.java b/lucene/contrib/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterPhraseTest.java
index 31752349dd2..755d9f5d4ec 100644
--- a/lucene/contrib/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterPhraseTest.java
+++ b/lucene/contrib/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterPhraseTest.java
@@ -36,6 +36,7 @@ import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.IndexWriter;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.index.TermPositionVector;
+import org.apache.lucene.index.IndexReader.AtomicReaderContext;
 import org.apache.lucene.search.Collector;
 import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.PhraseQuery;
@@ -69,7 +70,7 @@ public class HighlighterPhraseTest extends LuceneTestCase {
     final IndexReader indexReader = IndexReader.open(directory, true);
     try {
       assertEquals(1, indexReader.numDocs());
-      final IndexSearcher indexSearcher = new IndexSearcher(indexReader);
+      final IndexSearcher indexSearcher = newSearcher(indexReader);
       try {
         final PhraseQuery phraseQuery = new PhraseQuery();
         phraseQuery.add(new Term(FIELD, "fox"));
@@ -113,7 +114,7 @@ public class HighlighterPhraseTest extends LuceneTestCase {
     final IndexReader indexReader = IndexReader.open(directory, true);
     try {
       assertEquals(1, indexReader.numDocs());
-      final IndexSearcher indexSearcher = new IndexSearcher(indexReader);
+      final IndexSearcher indexSearcher = newSearcher(indexReader);
       try {
         final Query phraseQuery = new SpanNearQuery(new SpanQuery[] {
             new SpanTermQuery(new Term(FIELD, "fox")),
@@ -133,9 +134,9 @@ public class HighlighterPhraseTest extends LuceneTestCase {
           }
 
           @Override
-          public void setNextReader(IndexReader indexreader, int i)
+          public void setNextReader(AtomicReaderContext context)
               throws IOException {
-            this.baseDoc = i;
+            this.baseDoc = context.docBase;
           }
 
           @Override
@@ -183,7 +184,7 @@ public class HighlighterPhraseTest extends LuceneTestCase {
     final IndexReader indexReader = IndexReader.open(directory, true);
     try {
       assertEquals(1, indexReader.numDocs());
-      final IndexSearcher indexSearcher = new IndexSearcher(indexReader);
+      final IndexSearcher indexSearcher = newSearcher(indexReader);
       try {
         final PhraseQuery phraseQuery = new PhraseQuery();
         phraseQuery.add(new Term(FIELD, "did"));
@@ -226,7 +227,7 @@ public class HighlighterPhraseTest extends LuceneTestCase {
     final IndexReader indexReader = IndexReader.open(directory, true);
     try {
       assertEquals(1, indexReader.numDocs());
-      final IndexSearcher indexSearcher = new IndexSearcher(indexReader);
+      final IndexSearcher indexSearcher = newSearcher(indexReader);
       try {
         final PhraseQuery phraseQuery = new PhraseQuery();
         phraseQuery.add(new Term(FIELD, "did"));
@@ -267,7 +268,7 @@ public class HighlighterPhraseTest extends LuceneTestCase {
     final IndexReader indexReader = IndexReader.open(directory, true);
     try {
       assertEquals(1, indexReader.numDocs());
-      final IndexSearcher indexSearcher = new IndexSearcher(indexReader);
+      final IndexSearcher indexSearcher = newSearcher(indexReader);
       try {
         final Query phraseQuery = new SpanNearQuery(new SpanQuery[] {
             new SpanTermQuery(new Term(FIELD, "did")),
diff --git a/lucene/contrib/highlighter/src/test/org/apache/lucene/search/highlight/TokenSourcesTest.java b/lucene/contrib/highlighter/src/test/org/apache/lucene/search/highlight/TokenSourcesTest.java
new file mode 100644
index 00000000000..572aa219b78
--- /dev/null
+++ b/lucene/contrib/highlighter/src/test/org/apache/lucene/search/highlight/TokenSourcesTest.java
@@ -0,0 +1,191 @@
+package org.apache.lucene.search.highlight;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.io.Reader;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.Token;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
+import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.document.Field.TermVector;
+import org.apache.lucene.index.CorruptIndexException;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.index.TermPositionVector;
+import org.apache.lucene.search.DisjunctionMaxQuery;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.TopDocs;
+import org.apache.lucene.search.spans.SpanTermQuery;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.LockObtainFailedException;
+import org.apache.lucene.util.LuceneTestCase;
+
+// LUCENE-2874
+public class TokenSourcesTest extends LuceneTestCase {
+  private static final String FIELD = "text";
+
+  private static final class OverlapAnalyzer extends Analyzer {
+
+    @Override
+    public TokenStream tokenStream(String fieldName, Reader reader) {
+      return new TokenStreamOverlap();
+    }
+  }
+
+  private static final class TokenStreamOverlap extends TokenStream {
+    private Token[] tokens;
+
+    private int i = -1;
+
+    private final CharTermAttribute termAttribute = addAttribute(CharTermAttribute.class);
+    private final OffsetAttribute offsetAttribute = addAttribute(OffsetAttribute.class);
+    private final PositionIncrementAttribute positionIncrementAttribute = addAttribute(PositionIncrementAttribute.class);
+
+    public TokenStreamOverlap() {
+      reset();
+    }
+
+    @Override
+    public boolean incrementToken() throws IOException {
+      this.i++;
+      if (this.i >= this.tokens.length) {
+        return false;
+      }
+      clearAttributes();
+      termAttribute.setEmpty().append(this.tokens[i]);
+      offsetAttribute.setOffset(this.tokens[i].startOffset(),
+          this.tokens[i].endOffset());
+      positionIncrementAttribute.setPositionIncrement(this.tokens[i]
+          .getPositionIncrement());
+      return true;
+    }
+
+    @Override
+    public void reset() {
+      this.i = -1;
+      this.tokens = new Token[] {
+          new Token(new char[] { 't', 'h', 'e' }, 0, 3, 0, 3),
+          new Token(new char[] { '{', 'f', 'o', 'x', '}' }, 0, 5, 0, 7),
+          new Token(new char[] { 'f', 'o', 'x' }, 0, 3, 4, 7),
+          new Token(new char[] { 'd', 'i', 'd' }, 0, 3, 8, 11),
+          new Token(new char[] { 'n', 'o', 't' }, 0, 3, 12, 15),
+          new Token(new char[] { 'j', 'u', 'm', 'p' }, 0, 4, 16, 20) };
+      this.tokens[1].setPositionIncrement(0);
+    }
+  }
+
+  public void testOverlapWithOffset() throws CorruptIndexException,
+      LockObtainFailedException, IOException, InvalidTokenOffsetsException {
+    final String TEXT = "the fox did not jump";
+    final Directory directory = newDirectory();
+    final IndexWriter indexWriter = new IndexWriter(directory,
+        newIndexWriterConfig(TEST_VERSION_CURRENT, new OverlapAnalyzer()));
+    try {
+      final Document document = new Document();
+      document.add(new Field(FIELD, new TokenStreamOverlap(),
+          TermVector.WITH_OFFSETS));
+      indexWriter.addDocument(document);
+    } finally {
+      indexWriter.close();
+    }
+    final IndexReader indexReader = IndexReader.open(directory, true);
+    try {
+      assertEquals(1, indexReader.numDocs());
+      final IndexSearcher indexSearcher = newSearcher(indexReader);
+      try {
+        final DisjunctionMaxQuery query = new DisjunctionMaxQuery(1);
+        query.add(new SpanTermQuery(new Term(FIELD, "{fox}")));
+        query.add(new SpanTermQuery(new Term(FIELD, "fox")));
+        // final Query phraseQuery = new SpanNearQuery(new SpanQuery[] {
+        // new SpanTermQuery(new Term(FIELD, "{fox}")),
+        // new SpanTermQuery(new Term(FIELD, "fox")) }, 0, true);
+
+        TopDocs hits = indexSearcher.search(query, 1);
+        assertEquals(1, hits.totalHits);
+        final Highlighter highlighter = new Highlighter(
+            new SimpleHTMLFormatter(), new SimpleHTMLEncoder(),
+            new QueryScorer(query));
+        final TokenStream tokenStream = TokenSources
+            .getTokenStream(
+                (TermPositionVector) indexReader.getTermFreqVector(0, FIELD),
+                false);
+        assertEquals("the fox did not jump",
+            highlighter.getBestFragment(tokenStream, TEXT));
+      } finally {
+        indexSearcher.close();
+      }
+    } finally {
+      indexReader.close();
+      directory.close();
+    }
+  }
+
+  public void testOverlapWithPositionsAndOffset() throws CorruptIndexException,
+      LockObtainFailedException, IOException, InvalidTokenOffsetsException {
+    final String TEXT = "the fox did not jump";
+    final Directory directory = newDirectory();
+    final IndexWriter indexWriter = new IndexWriter(directory,
+        newIndexWriterConfig(TEST_VERSION_CURRENT, new OverlapAnalyzer()));
+    try {
+      final Document document = new Document();
+      document.add(new Field(FIELD, new TokenStreamOverlap(),
+          TermVector.WITH_POSITIONS_OFFSETS));
+      indexWriter.addDocument(document);
+    } finally {
+      indexWriter.close();
+    }
+    final IndexReader indexReader = IndexReader.open(directory, true);
+    try {
+      assertEquals(1, indexReader.numDocs());
+      final IndexSearcher indexSearcher = newSearcher(indexReader);
+      try {
+        final DisjunctionMaxQuery query = new DisjunctionMaxQuery(1);
+        query.add(new SpanTermQuery(new Term(FIELD, "{fox}")));
+        query.add(new SpanTermQuery(new Term(FIELD, "fox")));
+        // final Query phraseQuery = new SpanNearQuery(new SpanQuery[] {
+        // new SpanTermQuery(new Term(FIELD, "{fox}")),
+        // new SpanTermQuery(new Term(FIELD, "fox")) }, 0, true);
+
+        TopDocs hits = indexSearcher.search(query, 1);
+        assertEquals(1, hits.totalHits);
+        final Highlighter highlighter = new Highlighter(
+            new SimpleHTMLFormatter(), new SimpleHTMLEncoder(),
+            new QueryScorer(query));
+        final TokenStream tokenStream = TokenSources
+            .getTokenStream(
+                (TermPositionVector) indexReader.getTermFreqVector(0, FIELD),
+                false);
+        assertEquals("the fox did not jump",
+            highlighter.getBestFragment(tokenStream, TEXT));
+      } finally {
+        indexSearcher.close();
+      }
+    } finally {
+      indexReader.close();
+      directory.close();
+    }
+  }
+
+}
diff --git a/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedDocsAndPositionsEnum.java b/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedDocsAndPositionsEnum.java
index 816e454673e..e4eea034bd1 100644
--- a/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedDocsAndPositionsEnum.java
+++ b/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedDocsAndPositionsEnum.java
@@ -82,14 +82,17 @@ public class InstantiatedDocsAndPositionsEnum extends DocsAndPositionsEnum {
     return currentDoc.getTermPositions().length;
   }
   
+  @Override
   public int nextPosition() {
     return currentDoc.getTermPositions()[++posUpto];
   }
 
+  @Override
   public boolean hasPayload() {
     return currentDoc.getPayloads()[posUpto] != null;
   }
 
+  @Override
   public BytesRef getPayload() {
     payload.bytes = currentDoc.getPayloads()[posUpto];
     payload.length = payload.bytes.length;
diff --git a/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndex.java b/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndex.java
index 179d9c42c80..76faa4602bd 100644
--- a/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndex.java
+++ b/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndex.java
@@ -238,6 +238,10 @@ public class InstantiatedIndex
           while((text = termsEnum.next()) != null) {
             String termText = text.utf8ToString();
             InstantiatedTerm instantiatedTerm = new InstantiatedTerm(field, termText);
+            final long totalTermFreq = termsEnum.totalTermFreq();
+            if (totalTermFreq != -1) {
+              instantiatedTerm.addPositionsCount(totalTermFreq);
+            }
             getTermsByFieldAndText().get(field).put(termText, instantiatedTerm);
             instantiatedTerm.setTermIndex(terms.size());
             terms.add(instantiatedTerm);
diff --git a/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndexReader.java b/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndexReader.java
index 96d9198146d..742e101ce93 100644
--- a/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndexReader.java
+++ b/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndexReader.java
@@ -19,6 +19,7 @@ package org.apache.lucene.store.instantiated;
 import java.io.IOException;
 import java.util.Arrays;
 import java.util.Collection;
+import java.util.Collections;
 import java.util.HashMap;
 import java.util.HashSet;
 import java.util.Iterator;
@@ -32,6 +33,7 @@ import org.apache.lucene.document.Document;
 import org.apache.lucene.document.FieldSelector;
 import org.apache.lucene.index.*;
 import org.apache.lucene.index.values.DocValues;
+import org.apache.lucene.index.IndexReader.ReaderContext;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.util.BitVector;
 import org.apache.lucene.util.BytesRef;
@@ -46,10 +48,13 @@ import org.apache.lucene.util.Bits;
 public class InstantiatedIndexReader extends IndexReader {
 
   private final InstantiatedIndex index;
+  private ReaderContext context = new AtomicReaderContext(this);
+
 
   public InstantiatedIndexReader(InstantiatedIndex index) {
     super();
     this.index = index;
+    readerFinishedListeners = Collections.synchronizedSet(new HashSet());
   }
 
   /**
@@ -331,15 +336,6 @@ public class InstantiatedIndexReader extends IndexReader {
     return norms;
   }
 
-  @Override
-  public void norms(String field, byte[] bytes, int offset) throws IOException {
-    byte[] norms = getIndex().getNormsByFieldNameAndDocumentNumber().get(field);
-    if (norms == null) {
-      return;
-    }
-    System.arraycopy(norms, 0, bytes, offset, norms.length);
-  }
-
   @Override
   protected void doSetNorm(int doc, String field, byte value) throws IOException {
     if (uncommittedNormsByFieldNameAndDocumentNumber == null) {
@@ -410,18 +406,33 @@ public class InstantiatedIndexReader extends IndexReader {
         if (i < 0) {
           i = -i - 1;
         }
-        if (i >= orderedTerms.length || !orderedTerms[i].field().equals(field)) {
+        if (i >= orderedTerms.length || orderedTerms[i].field() != field) {
           // field does not exist
           return null;
         }
         final int startLoc = i;
 
+        // TODO: heavy to do this here; would be better to
+        // do it up front & cache
+        long sum = 0;
+        int upto = i;
+        while(upto < orderedTerms.length && orderedTerms[i].field() == field) {
+          sum += orderedTerms[i].getTotalTermFreq();
+          upto++;
+        }
+        final long sumTotalTermFreq = sum;
+
         return new Terms() {
           @Override 
           public TermsEnum iterator() {
             return new InstantiatedTermsEnum(orderedTerms, startLoc, field);
           }
 
+          @Override
+          public long getSumTotalTermFreq() {
+            return sumTotalTermFreq;
+          }
+
           @Override
           public Comparator getComparator() {
             return BytesRef.getUTF8SortedAsUnicodeComparator();
@@ -435,6 +446,11 @@ public class InstantiatedIndexReader extends IndexReader {
       }
     };
   }
+  
+  @Override
+  public ReaderContext getTopReaderContext() {
+    return context;
+  }
 
   @Override
   public TermFreqVector[] getTermFreqVectors(int docNumber) throws IOException {
diff --git a/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndexWriter.java b/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndexWriter.java
index 093b907193b..6114e9fff9e 100644
--- a/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndexWriter.java
+++ b/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndexWriter.java
@@ -38,10 +38,12 @@ import org.apache.lucene.analysis.Token;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Fieldable;
+import org.apache.lucene.index.FieldInvertState;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.index.TermVectorOffsetInfo;
-import org.apache.lucene.search.Similarity;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.SimilarityProvider;
 import org.apache.lucene.util.StringHelper;
 import org.apache.lucene.util.ArrayUtil;
 import org.apache.lucene.util.CollectionUtil;
@@ -66,7 +68,7 @@ public class InstantiatedIndexWriter implements Closeable {
   private final InstantiatedIndex index;
   private final Analyzer analyzer;
 
-  private Similarity similarity = Similarity.getDefault(); // how to normalize;
+  private SimilarityProvider similarityProvider = IndexSearcher.getDefaultSimilarityProvider(); // how to normalize;
 
   private transient Set fieldNameBuffer;
   /**
@@ -112,14 +114,14 @@ public class InstantiatedIndexWriter implements Closeable {
    *  MAddDocs_20000 -   7 4000 100 false -  -   1 -  -   20000 -  -   535,8 -  -  37,33 - 309 680 640 -  501 968 896
    * 
* - * @see org.apache.lucene.index.IndexWriter#setMergeFactor(int) + * @see org.apache.lucene.index.LogMergePolicy#setMergeFactor(int) */ public void setMergeFactor(int mergeFactor) { this.mergeFactor = mergeFactor; } /** - * @see org.apache.lucene.index.IndexWriter#getMergeFactor() + * @see org.apache.lucene.index.LogMergePolicy#getMergeFactor() */ public int getMergeFactor() { return mergeFactor; @@ -200,9 +202,9 @@ public class InstantiatedIndexWriter implements Closeable { byte[] oldNorms = index.getNormsByFieldNameAndDocumentNumber().get(field); if (oldNorms != null) { System.arraycopy(oldNorms, 0, norms, 0, oldNorms.length); - Arrays.fill(norms, oldNorms.length, norms.length, similarity.encodeNormValue(1.0f)); + Arrays.fill(norms, oldNorms.length, norms.length, (byte) 0); } else { - Arrays.fill(norms, 0, norms.length, similarity.encodeNormValue(1.0f)); + Arrays.fill(norms, 0, norms.length, (byte) 0); } normsByFieldNameAndDocumentNumber.put(field, norms); fieldNames.remove(field); @@ -210,7 +212,7 @@ public class InstantiatedIndexWriter implements Closeable { for (String field : fieldNames) { //System.out.println(field); byte[] norms = new byte[index.getDocumentsByNumber().length + termDocumentInformationFactoryByDocument.size()]; - Arrays.fill(norms, 0, norms.length, similarity.encodeNormValue(1.0f)); + Arrays.fill(norms, 0, norms.length, (byte) 0); normsByFieldNameAndDocumentNumber.put(field, norms); } fieldNames.clear(); @@ -235,10 +237,12 @@ public class InstantiatedIndexWriter implements Closeable { termsInDocument += eFieldTermDocInfoFactoriesByTermText.getValue().size(); if (eFieldTermDocInfoFactoriesByTermText.getKey().indexed && !eFieldTermDocInfoFactoriesByTermText.getKey().omitNorms) { - float norm = eFieldTermDocInfoFactoriesByTermText.getKey().boost; - norm *= document.getDocument().getBoost(); - norm *= similarity.lengthNorm(eFieldTermDocInfoFactoriesByTermText.getKey().fieldName, eFieldTermDocInfoFactoriesByTermText.getKey().fieldLength); - normsByFieldNameAndDocumentNumber.get(eFieldTermDocInfoFactoriesByTermText.getKey().fieldName)[document.getDocumentNumber()] = similarity.encodeNormValue(norm); + final String fieldName = eFieldTermDocInfoFactoriesByTermText.getKey().fieldName; + final FieldInvertState invertState = new FieldInvertState(); + invertState.setBoost(eFieldTermDocInfoFactoriesByTermText.getKey().boost * document.getDocument().getBoost()); + invertState.setLength(eFieldTermDocInfoFactoriesByTermText.getKey().fieldLength); + final float norm = similarityProvider.get(fieldName).computeNorm(fieldName, invertState); + normsByFieldNameAndDocumentNumber.get(fieldName)[document.getDocumentNumber()] = similarityProvider.get(fieldName).encodeNormValue(norm); } else { System.currentTimeMillis(); } @@ -313,6 +317,7 @@ public class InstantiatedIndexWriter implements Closeable { } associatedDocuments[associatedDocuments.length - 1] = info; term.setAssociatedDocuments(associatedDocuments); + term.addPositionsCount(positions.length); // todo optimize, only if term vector? informationByTermOfCurrentDocument.put(term, info); @@ -656,12 +661,12 @@ public class InstantiatedIndexWriter implements Closeable { addDocument(doc, analyzer); } - public Similarity getSimilarity() { - return similarity; + public SimilarityProvider getSimilarityProvider() { + return similarityProvider; } - public void setSimilarity(Similarity similarity) { - this.similarity = similarity; + public void setSimilarityProvider(SimilarityProvider similarityProvider) { + this.similarityProvider = similarityProvider; } public Analyzer getAnalyzer() { diff --git a/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedTerm.java b/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedTerm.java index b893e04cd86..cdd2197c89c 100644 --- a/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedTerm.java +++ b/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedTerm.java @@ -45,6 +45,8 @@ public class InstantiatedTerm private Term term; + private long totalTermFreq; + /** * index of term in InstantiatedIndex * @see org.apache.lucene.store.instantiated.InstantiatedIndex#getOrderedTerms() */ @@ -92,6 +94,14 @@ public class InstantiatedTerm this.associatedDocuments = associatedDocuments; } + void addPositionsCount(long count) { + totalTermFreq += count; + } + + public long getTotalTermFreq() { + return totalTermFreq; + } + /** * Finds index to the first beyond the current whose document number is * greater than or equal to target, -1 if there is no such element. diff --git a/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedTermsEnum.java b/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedTermsEnum.java index d5301529f37..57f8e3e7fcf 100644 --- a/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedTermsEnum.java +++ b/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedTermsEnum.java @@ -18,10 +18,14 @@ package org.apache.lucene.store.instantiated; import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; +import org.apache.lucene.index.OrdTermState; import org.apache.lucene.index.Term; +import org.apache.lucene.index.TermState; import org.apache.lucene.index.TermsEnum; import org.apache.lucene.index.DocsEnum; import org.apache.lucene.index.DocsAndPositionsEnum; + +import java.io.IOException; import java.util.Arrays; import java.util.Comparator; @@ -90,10 +94,6 @@ public class InstantiatedTermsEnum extends TermsEnum { } } - @Override - public void cacheCurrentTerm() { - } - @Override public BytesRef term() { return br; @@ -109,6 +109,12 @@ public class InstantiatedTermsEnum extends TermsEnum { return terms[upto].getAssociatedDocuments().length; } + @Override + public long totalTermFreq() { + final long v = terms[upto].getTotalTermFreq(); + return v == 0 ? -1 : v; + } + @Override public DocsEnum docs(Bits skipDocs, DocsEnum reuse) { if (reuse == null || !(reuse instanceof InstantiatedDocsEnum)) { @@ -129,5 +135,18 @@ public class InstantiatedTermsEnum extends TermsEnum { public Comparator getComparator() { return BytesRef.getUTF8SortedAsUnicodeComparator(); } + + @Override + public TermState termState() throws IOException { + final OrdTermState state = new OrdTermState(); + state.ord = upto - start; + return state; + } + + @Override + public void seek(BytesRef term, TermState state) throws IOException { + assert state != null && state instanceof OrdTermState; + seek(((OrdTermState)state).ord); // just use the ord for simplicity + } } diff --git a/lucene/contrib/instantiated/src/test/org/apache/lucene/store/instantiated/TestEmptyIndex.java b/lucene/contrib/instantiated/src/test/org/apache/lucene/store/instantiated/TestEmptyIndex.java index d6a7cf54a6b..f513a0bb423 100644 --- a/lucene/contrib/instantiated/src/test/org/apache/lucene/store/instantiated/TestEmptyIndex.java +++ b/lucene/contrib/instantiated/src/test/org/apache/lucene/store/instantiated/TestEmptyIndex.java @@ -37,7 +37,7 @@ public class TestEmptyIndex extends LuceneTestCase { InstantiatedIndex ii = new InstantiatedIndex(); IndexReader r = new InstantiatedIndexReader(ii); - IndexSearcher s = new IndexSearcher(r); + IndexSearcher s = newSearcher(r); TopDocs td = s.search(new TermQuery(new Term("foo", "bar")), 1); @@ -71,12 +71,6 @@ public class TestEmptyIndex extends LuceneTestCase { byte[] norms = MultiNorms.norms(r, "foo"); if (norms != null) { assertEquals(0, norms.length); - norms = new byte[10]; - Arrays.fill(norms, (byte)10); - r.norms("foo", norms, 10); - for (byte b : norms) { - assertEquals((byte)10, b); - } } } diff --git a/lucene/contrib/instantiated/src/test/org/apache/lucene/store/instantiated/TestIndicesEquals.java b/lucene/contrib/instantiated/src/test/org/apache/lucene/store/instantiated/TestIndicesEquals.java index a698abd52dd..7a5398c4ed0 100644 --- a/lucene/contrib/instantiated/src/test/org/apache/lucene/store/instantiated/TestIndicesEquals.java +++ b/lucene/contrib/instantiated/src/test/org/apache/lucene/store/instantiated/TestIndicesEquals.java @@ -65,7 +65,8 @@ public class TestIndicesEquals extends LuceneTestCase { // create dir data IndexWriter indexWriter = new IndexWriter(dir, newIndexWriterConfig( - TEST_VERSION_CURRENT, new MockAnalyzer())); + TEST_VERSION_CURRENT, new MockAnalyzer()).setMergePolicy(newInOrderLogMergePolicy())); + for (int i = 0; i < 20; i++) { Document document = new Document(); assembleDocument(document, i); @@ -90,7 +91,11 @@ public class TestIndicesEquals extends LuceneTestCase { // create dir data IndexWriter indexWriter = new IndexWriter(dir, newIndexWriterConfig( - TEST_VERSION_CURRENT, new MockAnalyzer())); + TEST_VERSION_CURRENT, new MockAnalyzer()).setMergePolicy(newInOrderLogMergePolicy())); + indexWriter.setInfoStream(VERBOSE ? System.out : null); + if (VERBOSE) { + System.out.println("TEST: make test index"); + } for (int i = 0; i < 500; i++) { Document document = new Document(); assembleDocument(document, i); @@ -320,6 +325,9 @@ public class TestIndicesEquals extends LuceneTestCase { protected void testEquals(Directory aprioriIndex, InstantiatedIndex testIndex) throws Exception { + if (VERBOSE) { + System.out.println("TEST: testEquals"); + } testTermDocsSomeMore(aprioriIndex, testIndex); IndexReader aprioriReader = IndexReader.open(aprioriIndex, false); @@ -351,35 +359,6 @@ public class TestIndicesEquals extends LuceneTestCase { for (int i = 0; i < aprioriNorms.length; i++) { assertEquals("norms does not equals for field " + field + " in document " + i, aprioriNorms[i], testNorms[i]); } - - // test norms as used by multireader - - aprioriNorms = new byte[aprioriReader.maxDoc()]; - MultiNorms.norms(aprioriReader, (String) field, aprioriNorms, 0); - - testNorms = new byte[testReader.maxDoc()]; - MultiNorms.norms(testReader, (String) field, testNorms, 0); - - assertEquals(aprioriNorms.length, testNorms.length); - - for (int i = 0; i < aprioriNorms.length; i++) { - assertEquals("norms does not equals for field " + field + " in document " + i, aprioriNorms[i], testNorms[i]); - } - - - // test norms as used by multireader - - aprioriNorms = new byte[aprioriReader.maxDoc() + 10]; - MultiNorms.norms(aprioriReader, (String) field, aprioriNorms, 10); - - testNorms = new byte[testReader.maxDoc() + 10]; - MultiNorms.norms(testReader, (String) field, testNorms, 10); - - assertEquals(aprioriNorms.length, testNorms.length); - - for (int i = 0; i < aprioriNorms.length; i++) { - assertEquals("norms does not equals for field " + field + " in document " + i, aprioriNorms[i], testNorms[i]); - } } } @@ -401,6 +380,9 @@ public class TestIndicesEquals extends LuceneTestCase { String aprioriField; while((aprioriField = aprioriFieldsEnum.next()) != null) { String testField = testFieldsEnum.next(); + if (VERBOSE) { + System.out.println("TEST: verify field=" + testField); + } assertEquals(aprioriField, testField); TermsEnum aprioriTermEnum = aprioriFieldsEnum.terms(); @@ -409,8 +391,15 @@ public class TestIndicesEquals extends LuceneTestCase { BytesRef aprioriText; while((aprioriText = aprioriTermEnum.next()) != null) { assertEquals(aprioriText, testTermEnum.next()); + if (VERBOSE) { + System.out.println("TEST: verify term=" + aprioriText.utf8ToString()); + } assertTrue(aprioriTermEnum.docFreq() == testTermEnum.docFreq()); + final long totalTermFreq = aprioriTermEnum.totalTermFreq(); + if (totalTermFreq != -1) { + assertEquals(totalTermFreq, testTermEnum.totalTermFreq()); + } // compare termDocs seeking @@ -434,6 +423,10 @@ public class TestIndicesEquals extends LuceneTestCase { assertEquals(DocsEnum.NO_MORE_DOCS, testTermDocs.nextDoc()); break; } + if (VERBOSE) { + System.out.println("TEST: verify doc=" + aprioriTermDocs.docID()); + } + assertTrue(testTermDocs.nextDoc() != DocsEnum.NO_MORE_DOCS); assertEquals(aprioriTermDocs.docID(), testTermDocs.docID()); @@ -445,12 +438,19 @@ public class TestIndicesEquals extends LuceneTestCase { DocsAndPositionsEnum aprioriTermPositions = aprioriTermEnum.docsAndPositions(MultiFields.getDeletedDocs(aprioriReader), null); DocsAndPositionsEnum testTermPositions = testTermEnum.docsAndPositions(MultiFields.getDeletedDocs(testReader), null); + if (VERBOSE) { + System.out.println("TEST: enum1=" + aprioriTermPositions + " enum2=" + testTermPositions); + } if (aprioriTermPositions != null) { for (int docIndex = 0; docIndex < aprioriReader.maxDoc(); docIndex++) { boolean hasNext = aprioriTermPositions.nextDoc() != DocsEnum.NO_MORE_DOCS; if (hasNext) { assertTrue(testTermPositions.nextDoc() != DocsEnum.NO_MORE_DOCS); + + if (VERBOSE) { + System.out.println("TEST: verify doc=" + aprioriTermPositions.docID()); + } assertEquals(aprioriTermPositions.freq(), testTermPositions.freq()); @@ -458,6 +458,10 @@ public class TestIndicesEquals extends LuceneTestCase { int aprioriPos = aprioriTermPositions.nextPosition(); int testPos = testTermPositions.nextPosition(); + if (VERBOSE) { + System.out.println("TEST: verify pos=" + aprioriPos); + } + assertEquals(aprioriPos, testPos); assertEquals(aprioriTermPositions.hasPayload(), testTermPositions.hasPayload()); diff --git a/lucene/contrib/instantiated/src/test/org/apache/lucene/store/instantiated/TestRealTime.java b/lucene/contrib/instantiated/src/test/org/apache/lucene/store/instantiated/TestRealTime.java index 4e7c59f9e6a..413d7f56fae 100644 --- a/lucene/contrib/instantiated/src/test/org/apache/lucene/store/instantiated/TestRealTime.java +++ b/lucene/contrib/instantiated/src/test/org/apache/lucene/store/instantiated/TestRealTime.java @@ -20,8 +20,8 @@ import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.Scorer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; +import org.apache.lucene.index.IndexReader.AtomicReaderContext; import org.apache.lucene.index.Term; -import org.apache.lucene.index.IndexReader; import org.apache.lucene.util.LuceneTestCase; /** @@ -36,7 +36,7 @@ public class TestRealTime extends LuceneTestCase { InstantiatedIndex index = new InstantiatedIndex(); InstantiatedIndexReader reader = new InstantiatedIndexReader(index); - IndexSearcher searcher = new IndexSearcher(reader); + IndexSearcher searcher = newSearcher(reader); InstantiatedIndexWriter writer = new InstantiatedIndexWriter(index); Document doc; @@ -67,7 +67,7 @@ public class TestRealTime extends LuceneTestCase { @Override public void setScorer(Scorer scorer) {} @Override - public void setNextReader(IndexReader reader, int docBase) {} + public void setNextReader(AtomicReaderContext context) {} @Override public boolean acceptsDocsOutOfOrder() { return true; } @Override diff --git a/lucene/contrib/lucli/pom.xml.template b/lucene/contrib/lucli/pom.xml.template deleted file mode 100644 index 8354ded1e33..00000000000 --- a/lucene/contrib/lucli/pom.xml.template +++ /dev/null @@ -1,43 +0,0 @@ - - - - - 4.0.0 - - org.apache.lucene - lucene-contrib - @version@ - - org.apache.lucene - lucene-lucli - jar - @version@ - lucene-contrib-lucli - Lucene Command Line Interface - - - jline - jline - 0.9.91 - - - diff --git a/lucene/contrib/lucli/src/java/lucli/LuceneMethods.java b/lucene/contrib/lucli/src/java/lucli/LuceneMethods.java index 9aca8ee3f27..266297b1f3b 100644 --- a/lucene/contrib/lucli/src/java/lucli/LuceneMethods.java +++ b/lucene/contrib/lucli/src/java/lucli/LuceneMethods.java @@ -41,6 +41,7 @@ import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; import org.apache.lucene.document.Document; import org.apache.lucene.document.Fieldable; import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexReader.AtomicReaderContext; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.Fields; @@ -232,7 +233,7 @@ class LuceneMethods { } @Override - public void setNextReader(IndexReader reader, int docBase) {} + public void setNextReader(AtomicReaderContext context) {} @Override public boolean acceptsDocsOutOfOrder() { return true; diff --git a/lucene/contrib/memory/pom.xml.template b/lucene/contrib/memory/pom.xml.template deleted file mode 100644 index ed5151464d6..00000000000 --- a/lucene/contrib/memory/pom.xml.template +++ /dev/null @@ -1,38 +0,0 @@ - - - - - 4.0.0 - - org.apache.lucene - lucene-contrib - @version@ - - org.apache.lucene - lucene-memory - Lucene Memory - @version@ - - High-performance single-document index to compare against Query - - jar - diff --git a/lucene/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java b/lucene/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java index 6b4dc526a06..e5a96711a63 100644 --- a/lucene/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java +++ b/lucene/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java @@ -25,6 +25,7 @@ import java.util.Collection; import java.util.Collections; import java.util.Comparator; import java.util.HashMap; +import java.util.HashSet; import java.util.Iterator; import java.util.Map; @@ -38,6 +39,9 @@ import org.apache.lucene.document.Document; import org.apache.lucene.document.FieldSelector; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Fields; +import org.apache.lucene.index.IndexReader.AtomicReaderContext; +import org.apache.lucene.index.OrdTermState; +import org.apache.lucene.index.TermState; import org.apache.lucene.index.Terms; import org.apache.lucene.index.TermsEnum; import org.apache.lucene.index.FieldsEnum; @@ -48,12 +52,14 @@ import org.apache.lucene.index.TermFreqVector; import org.apache.lucene.index.TermPositionVector; import org.apache.lucene.index.TermVectorMapper; import org.apache.lucene.index.FieldInvertState; +import org.apache.lucene.index.IndexReader.ReaderContext; import org.apache.lucene.index.values.DocValues; import org.apache.lucene.search.Collector; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.Scorer; import org.apache.lucene.search.Similarity; +import org.apache.lucene.search.SimilarityProvider; import org.apache.lucene.store.RAMDirectory; // for javadocs import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.BytesRef; @@ -443,7 +449,7 @@ public class MemoryIndex implements Serializable { } @Override - public void setNextReader(IndexReader reader, int docBase) { } + public void setNextReader(AtomicReaderContext context) { } }); float score = scores[0]; return score; @@ -607,6 +613,8 @@ public class MemoryIndex implements Serializable { /** Term for this field's fieldName, lazily computed on demand */ public transient Term template; + private final long sumTotalTermFreq; + private static final long serialVersionUID = 2882195016849084649L; public Info(HashMap terms, int numTokens, int numOverlapTokens, float boost) { @@ -614,6 +622,15 @@ public class MemoryIndex implements Serializable { this.numTokens = numTokens; this.numOverlapTokens = numOverlapTokens; this.boost = boost; + long sum = 0; + for(Map.Entry ent : terms.entrySet()) { + sum += ent.getValue().size(); + } + sumTotalTermFreq = sum; + } + + public long getSumTotalTermFreq() { + return sumTotalTermFreq; } /** @@ -739,9 +756,11 @@ public class MemoryIndex implements Serializable { private final class MemoryIndexReader extends IndexReader { private IndexSearcher searcher; // needed to find searcher.getSimilarity() + private final ReaderContext readerInfos = new AtomicReaderContext(this); private MemoryIndexReader() { super(); // avoid as much superclass baggage as possible + readerFinishedListeners = Collections.synchronizedSet(new HashSet()); } private Info getInfo(String fieldName) { @@ -765,6 +784,11 @@ public class MemoryIndex implements Serializable { if (DEBUG) System.err.println("MemoryIndexReader.docFreq: " + term + ", freq:" + freq); return freq; } + + @Override + public ReaderContext getTopReaderContext() { + return readerInfos; + } @Override public Fields fields() { @@ -823,6 +847,11 @@ public class MemoryIndex implements Serializable { public long getUniqueTermCount() { return info.sortedTerms.length; } + + @Override + public long getSumTotalTermFreq() { + return info.getSumTotalTermFreq(); + } }; } } @@ -888,10 +917,6 @@ public class MemoryIndex implements Serializable { return br; } - @Override - public void cacheCurrentTerm() { - } - @Override public long ord() { return termUpto; @@ -902,6 +927,11 @@ public class MemoryIndex implements Serializable { return 1; } + @Override + public long totalTermFreq() { + return info.sortedTerms[termUpto].getValue().size(); + } + @Override public DocsEnum docs(Bits skipDocs, DocsEnum reuse) { if (reuse == null || !(reuse instanceof MemoryDocsEnum)) { @@ -922,8 +952,21 @@ public class MemoryIndex implements Serializable { public Comparator getComparator() { return BytesRef.getUTF8SortedAsUnicodeComparator(); } - } + @Override + public void seek(BytesRef term, TermState state) throws IOException { + assert state != null; + this.seek(((OrdTermState)state).ord); + } + + @Override + public TermState termState() throws IOException { + OrdTermState ts = new OrdTermState(); + ts.ord = termUpto; + return ts; + } + } + private class MemoryDocsEnum extends DocsEnum { private ArrayIntList positions; private boolean hasNext; @@ -1142,9 +1185,9 @@ public class MemoryIndex implements Serializable { }; } - private Similarity getSimilarity() { - if (searcher != null) return searcher.getSimilarity(); - return Similarity.getDefault(); + private SimilarityProvider getSimilarityProvider() { + if (searcher != null) return searcher.getSimilarityProvider(); + return IndexSearcher.getDefaultSimilarityProvider(); } private void setSearcher(IndexSearcher searcher) { @@ -1154,20 +1197,21 @@ public class MemoryIndex implements Serializable { /** performance hack: cache norms to avoid repeated expensive calculations */ private byte[] cachedNorms; private String cachedFieldName; - private Similarity cachedSimilarity; + private SimilarityProvider cachedSimilarity; @Override public byte[] norms(String fieldName) { byte[] norms = cachedNorms; - Similarity sim = getSimilarity(); + SimilarityProvider sim = getSimilarityProvider(); if (fieldName != cachedFieldName || sim != cachedSimilarity) { // not cached? Info info = getInfo(fieldName); + Similarity fieldSim = sim.get(fieldName); int numTokens = info != null ? info.numTokens : 0; int numOverlapTokens = info != null ? info.numOverlapTokens : 0; float boost = info != null ? info.getBoost() : 1.0f; FieldInvertState invertState = new FieldInvertState(0, numTokens, numOverlapTokens, 0, boost); - float n = sim.computeNorm(fieldName, invertState); - byte norm = sim.encodeNormValue(n); + float n = fieldSim.computeNorm(fieldName, invertState); + byte norm = fieldSim.encodeNormValue(n); norms = new byte[] {norm}; // cache it for future reuse @@ -1179,13 +1223,6 @@ public class MemoryIndex implements Serializable { return norms; } - @Override - public void norms(String fieldName, byte[] bytes, int offset) { - if (DEBUG) System.err.println("MemoryIndexReader.norms*: " + fieldName); - byte[] norms = norms(fieldName); - System.arraycopy(norms, 0, bytes, offset, norms.length); - } - @Override protected void doSetNorm(int doc, String fieldName, byte value) { throw new UnsupportedOperationException(); diff --git a/lucene/contrib/misc/pom.xml.template b/lucene/contrib/misc/pom.xml.template deleted file mode 100644 index 0c47d3928b9..00000000000 --- a/lucene/contrib/misc/pom.xml.template +++ /dev/null @@ -1,36 +0,0 @@ - - - - - 4.0.0 - - org.apache.lucene - lucene-contrib - @version@ - - org.apache.lucene - lucene-misc - Lucene Miscellaneous - @version@ - Miscellaneous Lucene extensions - jar - diff --git a/lucene/contrib/misc/src/java/org/apache/lucene/index/FieldNormModifier.java b/lucene/contrib/misc/src/java/org/apache/lucene/index/FieldNormModifier.java index 9cfd56803ca..acebe221a9a 100644 --- a/lucene/contrib/misc/src/java/org/apache/lucene/index/FieldNormModifier.java +++ b/lucene/contrib/misc/src/java/org/apache/lucene/index/FieldNormModifier.java @@ -24,6 +24,7 @@ import java.util.ArrayList; import org.apache.lucene.search.DefaultSimilarity; import org.apache.lucene.search.Similarity; +import org.apache.lucene.search.SimilarityProvider; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.util.StringHelper; @@ -33,7 +34,7 @@ import org.apache.lucene.util.ReaderUtil; /** * Given a directory and a list of fields, updates the fieldNorms in place for every document. * - * If Similarity class is specified, uses its lengthNorm method to set norms. + * If Similarity class is specified, uses its computeNorm method to set norms. * If -n command line argument is used, removed field norms, as if * {@link org.apache.lucene.document.Field.Index}.NO_NORMS was used. * @@ -57,13 +58,13 @@ public class FieldNormModifier { System.exit(1); } - Similarity s = null; + SimilarityProvider s = null; if (args[1].equals("-d")) args[1] = DefaultSimilarity.class.getName(); try { - s = Class.forName(args[1]).asSubclass(Similarity.class).newInstance(); + s = Class.forName(args[1]).asSubclass(SimilarityProvider.class).newInstance(); } catch (Exception e) { System.err.println("Couldn't instantiate similarity with empty constructor: " + args[1]); e.printStackTrace(System.err); @@ -84,7 +85,7 @@ public class FieldNormModifier { private Directory dir; - private Similarity sim; + private SimilarityProvider sim; /** * Constructor for code that wishes to use this class programmatically @@ -93,7 +94,7 @@ public class FieldNormModifier { * @param d the Directory to modify * @param s the Similarity to use (can be null) */ - public FieldNormModifier(Directory d, Similarity s) { + public FieldNormModifier(Directory d, SimilarityProvider s) { dir = d; sim = s; } @@ -111,7 +112,7 @@ public class FieldNormModifier { */ public void reSetNorms(String field) throws IOException { String fieldName = StringHelper.intern(field); - + Similarity fieldSim = sim.get(field); IndexReader reader = null; try { reader = IndexReader.open(dir, false); @@ -119,6 +120,7 @@ public class FieldNormModifier { final List subReaders = new ArrayList(); ReaderUtil.gatherSubReaders(subReaders, reader); + final FieldInvertState invertState = new FieldInvertState(); for(IndexReader subReader : subReaders) { final Bits delDocs = subReader.getDeletedDocs(); @@ -143,9 +145,11 @@ public class FieldNormModifier { } } + invertState.setBoost(1.0f); for (int d = 0; d < termCounts.length; d++) { if (delDocs == null || !delDocs.get(d)) { - subReader.setNorm(d, fieldName, sim.encodeNormValue(sim.lengthNorm(fieldName, termCounts[d]))); + invertState.setLength(termCounts[d]); + subReader.setNorm(d, fieldName, fieldSim.encodeNormValue(fieldSim.computeNorm(fieldName, invertState))); } } } diff --git a/lucene/contrib/misc/src/java/org/apache/lucene/index/codecs/appending/AppendingCodec.java b/lucene/contrib/misc/src/java/org/apache/lucene/index/codecs/appending/AppendingCodec.java index fc34a077652..cb0dde07e76 100644 --- a/lucene/contrib/misc/src/java/org/apache/lucene/index/codecs/appending/AppendingCodec.java +++ b/lucene/contrib/misc/src/java/org/apache/lucene/index/codecs/appending/AppendingCodec.java @@ -32,7 +32,7 @@ import org.apache.lucene.index.codecs.PostingsReaderBase; import org.apache.lucene.index.codecs.standard.StandardPostingsReader; import org.apache.lucene.index.codecs.PostingsWriterBase; import org.apache.lucene.index.codecs.standard.StandardPostingsWriter; -import org.apache.lucene.index.codecs.PrefixCodedTermsReader; +import org.apache.lucene.index.codecs.BlockTermsReader; import org.apache.lucene.index.codecs.TermsIndexReaderBase; import org.apache.lucene.store.Directory; import org.apache.lucene.util.BytesRef; @@ -131,7 +131,7 @@ public class AppendingCodec extends Codec { public void files(Directory dir, SegmentInfo segmentInfo, String codecId, Set files) throws IOException { StandardPostingsReader.files(dir, segmentInfo, codecId, files); - PrefixCodedTermsReader.files(dir, segmentInfo, codecId, files); + BlockTermsReader.files(dir, segmentInfo, codecId, files); FixedGapTermsIndexReader.files(dir, segmentInfo, codecId, files); } diff --git a/lucene/contrib/misc/src/java/org/apache/lucene/index/codecs/appending/AppendingTermsDictReader.java b/lucene/contrib/misc/src/java/org/apache/lucene/index/codecs/appending/AppendingTermsDictReader.java index 65a3325ca57..8a1d9b80e78 100644 --- a/lucene/contrib/misc/src/java/org/apache/lucene/index/codecs/appending/AppendingTermsDictReader.java +++ b/lucene/contrib/misc/src/java/org/apache/lucene/index/codecs/appending/AppendingTermsDictReader.java @@ -22,15 +22,15 @@ import java.util.Comparator; import org.apache.lucene.index.FieldInfos; import org.apache.lucene.index.codecs.PostingsReaderBase; -import org.apache.lucene.index.codecs.PrefixCodedTermsReader; -import org.apache.lucene.index.codecs.PrefixCodedTermsWriter; +import org.apache.lucene.index.codecs.BlockTermsReader; +import org.apache.lucene.index.codecs.BlockTermsWriter; import org.apache.lucene.index.codecs.TermsIndexReaderBase; import org.apache.lucene.store.Directory; import org.apache.lucene.store.IndexInput; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.CodecUtil; -public class AppendingTermsDictReader extends PrefixCodedTermsReader { +public class AppendingTermsDictReader extends BlockTermsReader { public AppendingTermsDictReader(TermsIndexReaderBase indexReader, Directory dir, FieldInfos fieldInfos, String segment, @@ -43,7 +43,7 @@ public class AppendingTermsDictReader extends PrefixCodedTermsReader { @Override protected void readHeader(IndexInput in) throws IOException { CodecUtil.checkHeader(in, AppendingTermsDictWriter.CODEC_NAME, - PrefixCodedTermsWriter.VERSION_START, PrefixCodedTermsWriter.VERSION_CURRENT); + BlockTermsWriter.VERSION_START, BlockTermsWriter.VERSION_CURRENT); } @Override diff --git a/lucene/contrib/misc/src/java/org/apache/lucene/index/codecs/appending/AppendingTermsDictWriter.java b/lucene/contrib/misc/src/java/org/apache/lucene/index/codecs/appending/AppendingTermsDictWriter.java index 5c4e38bcc43..46362386afe 100644 --- a/lucene/contrib/misc/src/java/org/apache/lucene/index/codecs/appending/AppendingTermsDictWriter.java +++ b/lucene/contrib/misc/src/java/org/apache/lucene/index/codecs/appending/AppendingTermsDictWriter.java @@ -22,13 +22,13 @@ import java.util.Comparator; import org.apache.lucene.index.SegmentWriteState; import org.apache.lucene.index.codecs.PostingsWriterBase; -import org.apache.lucene.index.codecs.PrefixCodedTermsWriter; +import org.apache.lucene.index.codecs.BlockTermsWriter; import org.apache.lucene.index.codecs.TermsIndexWriterBase; import org.apache.lucene.store.IndexOutput; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.CodecUtil; -public class AppendingTermsDictWriter extends PrefixCodedTermsWriter { +public class AppendingTermsDictWriter extends BlockTermsWriter { final static String CODEC_NAME = "APPENDING_TERMS_DICT"; public AppendingTermsDictWriter(TermsIndexWriterBase indexWriter, diff --git a/lucene/contrib/misc/src/java/org/apache/lucene/misc/HighFreqTerms.java b/lucene/contrib/misc/src/java/org/apache/lucene/misc/HighFreqTerms.java index c0ac082f293..77d29820660 100644 --- a/lucene/contrib/misc/src/java/org/apache/lucene/misc/HighFreqTerms.java +++ b/lucene/contrib/misc/src/java/org/apache/lucene/misc/HighFreqTerms.java @@ -176,15 +176,34 @@ public class HighFreqTerms { return ts; } - public static long getTotalTermFreq(IndexReader reader, String field, BytesRef termtext) throws Exception { - BytesRef br = termtext; + public static long getTotalTermFreq(IndexReader reader, String field, BytesRef termText) throws Exception { + long totalTF = 0; - Bits skipDocs = MultiFields.getDeletedDocs(reader); - DocsEnum de = MultiFields.getTermDocsEnum(reader, skipDocs, field, br); - // if term is not in index return totalTF of 0 - if (de == null) { + + Terms terms = MultiFields.getTerms(reader, field); + if (terms == null) { return 0; } + + TermsEnum termsEnum = terms.iterator(); + if (termsEnum.seek(termText) != TermsEnum.SeekStatus.FOUND) { + return 0; + } + + Bits skipDocs = MultiFields.getDeletedDocs(reader); + if (skipDocs == null) { + // TODO: we could do this up front, during the scan + // (next()), instead of after-the-fact here w/ seek, + // if the codec supports it and there are no del + // docs... + final long totTF = termsEnum.totalTermFreq(); + if (totTF != -1) { + return totTF; + } + } + + DocsEnum de = termsEnum.docs(skipDocs, null); + // use DocsEnum.read() and BulkResult api final DocsEnum.BulkReadResult bulkresult = de.getBulkResult(); int count; diff --git a/lucene/contrib/misc/src/java/org/apache/lucene/misc/SweetSpotSimilarity.java b/lucene/contrib/misc/src/java/org/apache/lucene/misc/SweetSpotSimilarity.java index 00a60eb1a82..cda2f0790bf 100644 --- a/lucene/contrib/misc/src/java/org/apache/lucene/misc/SweetSpotSimilarity.java +++ b/lucene/contrib/misc/src/java/org/apache/lucene/misc/SweetSpotSimilarity.java @@ -146,7 +146,7 @@ public class SweetSpotSimilarity extends DefaultSimilarity { else numTokens = state.getLength(); - return state.getBoost() * lengthNorm(fieldName, numTokens); + return state.getBoost() * computeLengthNorm(fieldName, numTokens); } /** @@ -167,8 +167,7 @@ public class SweetSpotSimilarity extends DefaultSimilarity { * * @see #setLengthNormFactors */ - @Override - public float lengthNorm(String fieldName, int numTerms) { + public float computeLengthNorm(String fieldName, int numTerms) { int l = ln_min; int h = ln_max; float s = ln_steep; diff --git a/lucene/contrib/misc/src/java/org/apache/lucene/misc/TermStats.java b/lucene/contrib/misc/src/java/org/apache/lucene/misc/TermStats.java index b28fbdde436..08c4a808367 100644 --- a/lucene/contrib/misc/src/java/org/apache/lucene/misc/TermStats.java +++ b/lucene/contrib/misc/src/java/org/apache/lucene/misc/TermStats.java @@ -41,4 +41,9 @@ public final class TermStats { String getTermText() { return termtext.utf8ToString(); } + + @Override + public String toString() { + return("TermStats: term=" + termtext.utf8ToString() + " docFreq=" + docFreq + " totalTermFreq=" + totalTermFreq); + } } \ No newline at end of file diff --git a/lucene/contrib/misc/src/java/org/apache/lucene/store/NativePosixUtil.cpp b/lucene/contrib/misc/src/java/org/apache/lucene/store/NativePosixUtil.cpp index 7ccf7e7b445..fa05142f877 100644 --- a/lucene/contrib/misc/src/java/org/apache/lucene/store/NativePosixUtil.cpp +++ b/lucene/contrib/misc/src/java/org/apache/lucene/store/NativePosixUtil.cpp @@ -1,3 +1,20 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to You under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ + #include #include // posix_fadvise, constants for open #include // strerror diff --git a/lucene/contrib/misc/src/java/org/apache/lucene/store/WindowsDirectory.java b/lucene/contrib/misc/src/java/org/apache/lucene/store/WindowsDirectory.java index 870ebfade1e..f1c3f74a117 100644 --- a/lucene/contrib/misc/src/java/org/apache/lucene/store/WindowsDirectory.java +++ b/lucene/contrib/misc/src/java/org/apache/lucene/store/WindowsDirectory.java @@ -64,6 +64,7 @@ public class WindowsDirectory extends FSDirectory { super(path, null); } + @Override public IndexInput openInput(String name, int bufferSize) throws IOException { ensureOpen(); return new WindowsIndexInput(new File(getDirectory(), name), Math.max(bufferSize, DEFAULT_BUFFERSIZE)); @@ -82,14 +83,17 @@ public class WindowsDirectory extends FSDirectory { isOpen = true; } + @Override protected void readInternal(byte[] b, int offset, int length) throws IOException { if (WindowsDirectory.read(fd, b, offset, length, getFilePointer()) != length) throw new IOException("Read past EOF"); } + @Override protected void seekInternal(long pos) throws IOException { } + @Override public synchronized void close() throws IOException { // NOTE: we synchronize and track "isOpen" because Lucene sometimes closes IIs twice! if (!isClone && isOpen) { @@ -98,6 +102,7 @@ public class WindowsDirectory extends FSDirectory { } } + @Override public long length() { return length; } diff --git a/lucene/contrib/misc/src/test/org/apache/lucene/index/TestFieldNormModifier.java b/lucene/contrib/misc/src/test/org/apache/lucene/index/TestFieldNormModifier.java index 078ba2c3f26..48bb42dfcf5 100644 --- a/lucene/contrib/misc/src/test/org/apache/lucene/index/TestFieldNormModifier.java +++ b/lucene/contrib/misc/src/test/org/apache/lucene/index/TestFieldNormModifier.java @@ -23,11 +23,12 @@ import java.util.Arrays; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; +import org.apache.lucene.index.IndexReader.AtomicReaderContext; import org.apache.lucene.search.Collector; import org.apache.lucene.search.DefaultSimilarity; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Scorer; -import org.apache.lucene.search.Similarity; +import org.apache.lucene.search.SimilarityProvider; import org.apache.lucene.search.TermQuery; import org.apache.lucene.store.Directory; import org.apache.lucene.util.LuceneTestCase; @@ -41,10 +42,10 @@ public class TestFieldNormModifier extends LuceneTestCase { public Directory store; /** inverts the normal notion of lengthNorm */ - public static Similarity s = new DefaultSimilarity() { + public static SimilarityProvider s = new DefaultSimilarity() { @Override - public float lengthNorm(String fieldName, int numTokens) { - return numTokens; + public float computeNorm(String fieldName, FieldInvertState state) { + return state.getBoost() * (discountOverlaps ? state.getLength() - state.getNumOverlap() : state.getLength()); } }; @@ -53,7 +54,7 @@ public class TestFieldNormModifier extends LuceneTestCase { super.setUp(); store = newDirectory(); IndexWriter writer = new IndexWriter(store, newIndexWriterConfig( - TEST_VERSION_CURRENT, new MockAnalyzer())); + TEST_VERSION_CURRENT, new MockAnalyzer()).setMergePolicy(newInOrderLogMergePolicy())); for (int i = 0; i < NUM_DOCS; i++) { Document d = new Document(); @@ -122,8 +123,8 @@ public class TestFieldNormModifier extends LuceneTestCase { scores[doc + docBase] = scorer.score(); } @Override - public void setNextReader(IndexReader reader, int docBase) { - this.docBase = docBase; + public void setNextReader(AtomicReaderContext context) { + docBase = context.docBase; } @Override public void setScorer(Scorer scorer) throws IOException { @@ -157,8 +158,8 @@ public class TestFieldNormModifier extends LuceneTestCase { scores[doc + docBase] = scorer.score(); } @Override - public void setNextReader(IndexReader reader, int docBase) { - this.docBase = docBase; + public void setNextReader(AtomicReaderContext context) { + docBase = context.docBase; } @Override public void setScorer(Scorer scorer) throws IOException { @@ -209,8 +210,8 @@ public class TestFieldNormModifier extends LuceneTestCase { scores[doc + docBase] = scorer.score(); } @Override - public void setNextReader(IndexReader reader, int docBase) { - this.docBase = docBase; + public void setNextReader(AtomicReaderContext context) { + docBase = context.docBase; } @Override public void setScorer(Scorer scorer) throws IOException { diff --git a/lucene/contrib/misc/src/test/org/apache/lucene/index/TestMultiPassIndexSplitter.java b/lucene/contrib/misc/src/test/org/apache/lucene/index/TestMultiPassIndexSplitter.java index f861063942d..158b24ff58b 100644 --- a/lucene/contrib/misc/src/test/org/apache/lucene/index/TestMultiPassIndexSplitter.java +++ b/lucene/contrib/misc/src/test/org/apache/lucene/index/TestMultiPassIndexSplitter.java @@ -32,7 +32,7 @@ public class TestMultiPassIndexSplitter extends LuceneTestCase { public void setUp() throws Exception { super.setUp(); dir = newDirectory(); - IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer())); + IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).setMergePolicy(newInOrderLogMergePolicy())); Document doc; for (int i = 0; i < NUM_DOCS; i++) { doc = new Document(); diff --git a/lucene/contrib/misc/src/test/org/apache/lucene/misc/SweetSpotSimilarityTest.java b/lucene/contrib/misc/src/test/org/apache/lucene/misc/SweetSpotSimilarityTest.java index 36450e23ae3..170ef247842 100644 --- a/lucene/contrib/misc/src/test/org/apache/lucene/misc/SweetSpotSimilarityTest.java +++ b/lucene/contrib/misc/src/test/org/apache/lucene/misc/SweetSpotSimilarityTest.java @@ -21,13 +21,14 @@ package org.apache.lucene.misc; import org.apache.lucene.search.DefaultSimilarity; import org.apache.lucene.search.Similarity; import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.index.FieldInvertState; /** * Test of the SweetSpotSimilarity */ public class SweetSpotSimilarityTest extends LuceneTestCase { - public void testSweetSpotLengthNorm() { + public void testSweetSpotComputeNorm() { SweetSpotSimilarity ss = new SweetSpotSimilarity(); ss.setLengthNormFactors(1,1,0.5f); @@ -37,10 +38,13 @@ public class SweetSpotSimilarityTest extends LuceneTestCase { // base case, should degrade - + final FieldInvertState invertState = new FieldInvertState(); + invertState.setBoost(1.0f); for (int i = 1; i < 1000; i++) { + invertState.setLength(i); assertEquals("base case: i="+i, - d.lengthNorm("foo",i), s.lengthNorm("foo",i), + d.computeNorm("foo", invertState), + s.computeNorm("foo", invertState), 0.0f); } @@ -49,14 +53,21 @@ public class SweetSpotSimilarityTest extends LuceneTestCase { ss.setLengthNormFactors(3,10,0.5f); for (int i = 3; i <=10; i++) { + invertState.setLength(i); assertEquals("3,10: spot i="+i, - 1.0f, s.lengthNorm("foo",i), + 1.0f, + s.computeNorm("foo", invertState), 0.0f); } for (int i = 10; i < 1000; i++) { + invertState.setLength(i-9); + final float normD = d.computeNorm("foo", invertState); + invertState.setLength(i); + final float normS = s.computeNorm("foo", invertState); assertEquals("3,10: 100){ - assertTrue ("out of order " + terms[i-1].docFreq + "should be >= " + terms[i].docFreq,terms[i-1].docFreq >= terms[i].docFreq); + if (i > 0) { + assertTrue ("out of order " + terms[i-1].docFreq + "should be >= " + terms[i].docFreq,terms[i-1].docFreq >= terms[i].docFreq); } } } @@ -134,11 +137,12 @@ public class TestHighFreqTerms extends LuceneTestCase { TermStats[] terms = HighFreqTerms.getHighFreqTerms(reader, numTerms, field); TermStats[] termsWithTF = HighFreqTerms.sortByTotalTermFreq(reader, terms); - for (int i = 0; i < termsWithTF.length; i++) { - // check that they are sorted by descending termfreq order - if (i >0){ - assertTrue ("out of order" +termsWithTF[i-1]+ " > " +termsWithTF[i],termsWithTF[i-1].totalTermFreq > termsWithTF[i].totalTermFreq); - } + for (int i = 0; i < termsWithTF.length; i++) { + // check that they are sorted by descending termfreq + // order + if (i > 0) { + assertTrue ("out of order" +termsWithTF[i-1]+ " > " +termsWithTF[i],termsWithTF[i-1].totalTermFreq >= termsWithTF[i].totalTermFreq); + } } } diff --git a/lucene/contrib/misc/src/test/org/apache/lucene/misc/TestLengthNormModifier.java b/lucene/contrib/misc/src/test/org/apache/lucene/misc/TestLengthNormModifier.java index af02e36c72d..7dfa6a311d5 100644 --- a/lucene/contrib/misc/src/test/org/apache/lucene/misc/TestLengthNormModifier.java +++ b/lucene/contrib/misc/src/test/org/apache/lucene/misc/TestLengthNormModifier.java @@ -22,8 +22,10 @@ import java.io.IOException; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; +import org.apache.lucene.index.FieldInvertState; import org.apache.lucene.index.FieldNormModifier; import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexReader.AtomicReaderContext; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.MultiNorms; import org.apache.lucene.index.Term; @@ -31,7 +33,7 @@ import org.apache.lucene.search.Collector; import org.apache.lucene.search.DefaultSimilarity; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Scorer; -import org.apache.lucene.search.Similarity; +import org.apache.lucene.search.SimilarityProvider; import org.apache.lucene.search.TermQuery; import org.apache.lucene.store.Directory; import org.apache.lucene.util.LuceneTestCase; @@ -45,19 +47,19 @@ public class TestLengthNormModifier extends LuceneTestCase { public Directory store; /** inverts the normal notion of lengthNorm */ - public static Similarity s = new DefaultSimilarity() { - @Override - public float lengthNorm(String fieldName, int numTokens) { - return numTokens; - } - }; + public static SimilarityProvider s = new DefaultSimilarity() { + @Override + public float computeNorm(String fieldName, FieldInvertState state) { + return state.getBoost() * (discountOverlaps ? state.getLength() - state.getNumOverlap() : state.getLength()); + } + }; @Override public void setUp() throws Exception { super.setUp(); store = newDirectory(); IndexWriter writer = new IndexWriter(store, newIndexWriterConfig( - TEST_VERSION_CURRENT, new MockAnalyzer())); + TEST_VERSION_CURRENT, new MockAnalyzer()).setMergePolicy(newInOrderLogMergePolicy())); for (int i = 0; i < NUM_DOCS; i++) { Document d = new Document(); @@ -138,8 +140,8 @@ public class TestLengthNormModifier extends LuceneTestCase { scores[doc + docBase] = scorer.score(); } @Override - public void setNextReader(IndexReader reader, int docBase) { - this.docBase = docBase; + public void setNextReader(AtomicReaderContext context) { + docBase = context.docBase; } @Override public void setScorer(Scorer scorer) throws IOException { @@ -161,12 +163,12 @@ public class TestLengthNormModifier extends LuceneTestCase { } // override the norms to be inverted - Similarity s = new DefaultSimilarity() { - @Override - public float lengthNorm(String fieldName, int numTokens) { - return numTokens; - } - }; + SimilarityProvider s = new DefaultSimilarity() { + @Override + public float computeNorm(String fieldName, FieldInvertState state) { + return state.getBoost() * (discountOverlaps ? state.getLength() - state.getNumOverlap() : state.getLength()); + } + }; FieldNormModifier fnm = new FieldNormModifier(store, s); fnm.reSetNorms("field"); @@ -180,8 +182,8 @@ public class TestLengthNormModifier extends LuceneTestCase { scores[doc + docBase] = scorer.score(); } @Override - public void setNextReader(IndexReader reader, int docBase) { - this.docBase = docBase; + public void setNextReader(AtomicReaderContext context) { + docBase = context.docBase; } @Override public void setScorer(Scorer scorer) throws IOException { diff --git a/lucene/contrib/queries/pom.xml.template b/lucene/contrib/queries/pom.xml.template deleted file mode 100644 index 0cd10860125..00000000000 --- a/lucene/contrib/queries/pom.xml.template +++ /dev/null @@ -1,45 +0,0 @@ - - - - - 4.0.0 - - org.apache.lucene - lucene-contrib - @version@ - - org.apache.lucene - lucene-queries - Lucene Queries - @version@ - - Queries - various query object exotica not in core - - jar - - - jakarta-regexp - jakarta-regexp - ${jakarta-regexp-version} - - - diff --git a/lucene/contrib/queries/src/java/org/apache/lucene/search/BooleanFilter.java b/lucene/contrib/queries/src/java/org/apache/lucene/search/BooleanFilter.java index 2e6868e5202..99a84b20bb7 100644 --- a/lucene/contrib/queries/src/java/org/apache/lucene/search/BooleanFilter.java +++ b/lucene/contrib/queries/src/java/org/apache/lucene/search/BooleanFilter.java @@ -21,6 +21,7 @@ import java.io.IOException; import java.util.ArrayList; import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexReader.AtomicReaderContext; import org.apache.lucene.search.BooleanClause.Occur; import org.apache.lucene.util.OpenBitSet; import org.apache.lucene.util.OpenBitSetDISI; @@ -41,10 +42,10 @@ public class BooleanFilter extends Filter ArrayList notFilters = null; ArrayList mustFilters = null; - private DocIdSetIterator getDISI(ArrayList filters, int index, IndexReader reader) + private DocIdSetIterator getDISI(ArrayList filters, int index, AtomicReaderContext context) throws IOException { - return filters.get(index).getDocIdSet(reader).iterator(); + return filters.get(index).getDocIdSet(context).iterator(); } /** @@ -52,21 +53,21 @@ public class BooleanFilter extends Filter * of the filters that have been added. */ @Override - public DocIdSet getDocIdSet(IndexReader reader) throws IOException + public DocIdSet getDocIdSet(AtomicReaderContext context) throws IOException { OpenBitSetDISI res = null; - + final IndexReader reader = context.reader; if (shouldFilters != null) { for (int i = 0; i < shouldFilters.size(); i++) { if (res == null) { - res = new OpenBitSetDISI(getDISI(shouldFilters, i, reader), reader.maxDoc()); + res = new OpenBitSetDISI(getDISI(shouldFilters, i, context), reader.maxDoc()); } else { - DocIdSet dis = shouldFilters.get(i).getDocIdSet(reader); + DocIdSet dis = shouldFilters.get(i).getDocIdSet(context); if(dis instanceof OpenBitSet) { // optimized case for OpenBitSets res.or((OpenBitSet) dis); } else { - res.inPlaceOr(getDISI(shouldFilters, i, reader)); + res.inPlaceOr(getDISI(shouldFilters, i, context)); } } } @@ -75,15 +76,15 @@ public class BooleanFilter extends Filter if (notFilters!=null) { for (int i = 0; i < notFilters.size(); i++) { if (res == null) { - res = new OpenBitSetDISI(getDISI(notFilters, i, reader), reader.maxDoc()); + res = new OpenBitSetDISI(getDISI(notFilters, i, context), reader.maxDoc()); res.flip(0, reader.maxDoc()); // NOTE: may set bits on deleted docs } else { - DocIdSet dis = notFilters.get(i).getDocIdSet(reader); + DocIdSet dis = notFilters.get(i).getDocIdSet(context); if(dis instanceof OpenBitSet) { // optimized case for OpenBitSets res.andNot((OpenBitSet) dis); } else { - res.inPlaceNot(getDISI(notFilters, i, reader)); + res.inPlaceNot(getDISI(notFilters, i, context)); } } } @@ -92,14 +93,14 @@ public class BooleanFilter extends Filter if (mustFilters!=null) { for (int i = 0; i < mustFilters.size(); i++) { if (res == null) { - res = new OpenBitSetDISI(getDISI(mustFilters, i, reader), reader.maxDoc()); + res = new OpenBitSetDISI(getDISI(mustFilters, i, context), reader.maxDoc()); } else { - DocIdSet dis = mustFilters.get(i).getDocIdSet(reader); + DocIdSet dis = mustFilters.get(i).getDocIdSet(context); if(dis instanceof OpenBitSet) { // optimized case for OpenBitSets res.and((OpenBitSet) dis); } else { - res.inPlaceAnd(getDISI(mustFilters, i, reader)); + res.inPlaceAnd(getDISI(mustFilters, i, context)); } } } diff --git a/lucene/contrib/queries/src/java/org/apache/lucene/search/BoostingQuery.java b/lucene/contrib/queries/src/java/org/apache/lucene/search/BoostingQuery.java index 5a5fa0b388d..0b81cd6181a 100644 --- a/lucene/contrib/queries/src/java/org/apache/lucene/search/BoostingQuery.java +++ b/lucene/contrib/queries/src/java/org/apache/lucene/search/BoostingQuery.java @@ -21,10 +21,9 @@ import java.io.IOException; import org.apache.lucene.index.IndexReader; import org.apache.lucene.search.BooleanQuery; -import org.apache.lucene.search.DefaultSimilarity; import org.apache.lucene.search.Query; import org.apache.lucene.search.IndexSearcher; -import org.apache.lucene.search.Similarity; + /** * The BoostingQuery class can be used to effectively demote results that match a given query. * Unlike the "NOT" clause, this still selects documents that contain undesirable terms, @@ -56,10 +55,9 @@ public class BoostingQuery extends Query { @Override public Query rewrite(IndexReader reader) throws IOException { BooleanQuery result = new BooleanQuery() { - @Override - public Similarity getSimilarity(IndexSearcher searcher) { - return new DefaultSimilarity() { + public Weight createWeight(IndexSearcher searcher) throws IOException { + return new BooleanWeight(searcher, false) { @Override public float coord(int overlap, int max) { diff --git a/lucene/contrib/queries/src/java/org/apache/lucene/search/ChainedFilter.java b/lucene/contrib/queries/src/java/org/apache/lucene/search/ChainedFilter.java index e95b50660d8..4041792f5af 100644 --- a/lucene/contrib/queries/src/java/org/apache/lucene/search/ChainedFilter.java +++ b/lucene/contrib/queries/src/java/org/apache/lucene/search/ChainedFilter.java @@ -20,6 +20,7 @@ package org.apache.lucene.search; import java.io.IOException; import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexReader.AtomicReaderContext; import org.apache.lucene.search.DocIdSet; import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.search.Filter; @@ -96,21 +97,21 @@ public class ChainedFilter extends Filter * {@link Filter#getDocIdSet}. */ @Override - public DocIdSet getDocIdSet(IndexReader reader) throws IOException + public DocIdSet getDocIdSet(AtomicReaderContext context) throws IOException { int[] index = new int[1]; // use array as reference to modifiable int; index[0] = 0; // an object attribute would not be thread safe. if (logic != -1) - return getDocIdSet(reader, logic, index); + return getDocIdSet(context, logic, index); else if (logicArray != null) - return getDocIdSet(reader, logicArray, index); + return getDocIdSet(context, logicArray, index); else - return getDocIdSet(reader, DEFAULT, index); + return getDocIdSet(context, DEFAULT, index); } - private DocIdSetIterator getDISI(Filter filter, IndexReader reader) + private DocIdSetIterator getDISI(Filter filter, AtomicReaderContext context) throws IOException { - DocIdSet docIdSet = filter.getDocIdSet(reader); + DocIdSet docIdSet = filter.getDocIdSet(context); if (docIdSet == null) { return DocIdSet.EMPTY_DOCIDSET.iterator(); } else { @@ -123,9 +124,10 @@ public class ChainedFilter extends Filter } } - private OpenBitSetDISI initialResult(IndexReader reader, int logic, int[] index) + private OpenBitSetDISI initialResult(AtomicReaderContext context, int logic, int[] index) throws IOException { + IndexReader reader = context.reader; OpenBitSetDISI result; /** * First AND operation takes place against a completely false @@ -133,12 +135,12 @@ public class ChainedFilter extends Filter */ if (logic == AND) { - result = new OpenBitSetDISI(getDISI(chain[index[0]], reader), reader.maxDoc()); + result = new OpenBitSetDISI(getDISI(chain[index[0]], context), reader.maxDoc()); ++index[0]; } else if (logic == ANDNOT) { - result = new OpenBitSetDISI(getDISI(chain[index[0]], reader), reader.maxDoc()); + result = new OpenBitSetDISI(getDISI(chain[index[0]], context), reader.maxDoc()); result.flip(0,reader.maxDoc()); // NOTE: may set bits for deleted docs. ++index[0]; } @@ -155,13 +157,13 @@ public class ChainedFilter extends Filter * @param logic Logical operation * @return DocIdSet */ - private DocIdSet getDocIdSet(IndexReader reader, int logic, int[] index) + private DocIdSet getDocIdSet(AtomicReaderContext context, int logic, int[] index) throws IOException { - OpenBitSetDISI result = initialResult(reader, logic, index); + OpenBitSetDISI result = initialResult(context, logic, index); for (; index[0] < chain.length; index[0]++) { - doChain(result, logic, chain[index[0]].getDocIdSet(reader)); + doChain(result, logic, chain[index[0]].getDocIdSet(context)); } return result; } @@ -172,16 +174,16 @@ public class ChainedFilter extends Filter * @param logic Logical operation * @return DocIdSet */ - private DocIdSet getDocIdSet(IndexReader reader, int[] logic, int[] index) + private DocIdSet getDocIdSet(AtomicReaderContext info, int[] logic, int[] index) throws IOException { if (logic.length != chain.length) throw new IllegalArgumentException("Invalid number of elements in logic array"); - OpenBitSetDISI result = initialResult(reader, logic[0], index); + OpenBitSetDISI result = initialResult(info, logic[0], index); for (; index[0] < chain.length; index[0]++) { - doChain(result, logic[index[0]], chain[index[0]].getDocIdSet(reader)); + doChain(result, logic[index[0]], chain[index[0]].getDocIdSet(info)); } return result; } diff --git a/lucene/contrib/queries/src/java/org/apache/lucene/search/DuplicateFilter.java b/lucene/contrib/queries/src/java/org/apache/lucene/search/DuplicateFilter.java index 3a249344986..3b0c8de8296 100644 --- a/lucene/contrib/queries/src/java/org/apache/lucene/search/DuplicateFilter.java +++ b/lucene/contrib/queries/src/java/org/apache/lucene/search/DuplicateFilter.java @@ -19,6 +19,7 @@ import java.io.IOException; import org.apache.lucene.index.IndexReader; import org.apache.lucene.util.BytesRef; +import org.apache.lucene.index.IndexReader.AtomicReaderContext; import org.apache.lucene.index.Terms; import org.apache.lucene.index.DocsEnum; import org.apache.lucene.index.TermsEnum; @@ -27,7 +28,8 @@ import org.apache.lucene.util.OpenBitSet; import org.apache.lucene.util.Bits; public class DuplicateFilter extends Filter -{ +{ // TODO: make duplicate filter aware of ReaderContext such that we can + // filter duplicates across segments String fieldName; @@ -70,15 +72,15 @@ public class DuplicateFilter extends Filter } @Override - public DocIdSet getDocIdSet(IndexReader reader) throws IOException + public DocIdSet getDocIdSet(AtomicReaderContext context) throws IOException { if(processingMode==PM_FAST_INVALIDATION) { - return fastBits(reader); + return fastBits(context.reader); } else { - return correctBits(reader); + return correctBits(context.reader); } } @@ -96,7 +98,7 @@ public class DuplicateFilter extends Filter } else { docs = termsEnum.docs(delDocs, docs); int doc = docs.nextDoc(); - if (doc != docs.NO_MORE_DOCS) { + if (doc != DocsEnum.NO_MORE_DOCS) { if (keepMode == KM_USE_FIRST_OCCURRENCE) { bits.set(doc); } else { @@ -104,7 +106,7 @@ public class DuplicateFilter extends Filter while (true) { lastDoc = doc; doc = docs.nextDoc(); - if (doc == docs.NO_MORE_DOCS) { + if (doc == DocsEnum.NO_MORE_DOCS) { break; } } @@ -136,7 +138,7 @@ public class DuplicateFilter extends Filter // unset potential duplicates docs = termsEnum.docs(delDocs, docs); int doc = docs.nextDoc(); - if (doc != docs.NO_MORE_DOCS) { + if (doc != DocsEnum.NO_MORE_DOCS) { if (keepMode == KM_USE_FIRST_OCCURRENCE) { doc = docs.nextDoc(); } @@ -147,7 +149,7 @@ public class DuplicateFilter extends Filter lastDoc = doc; bits.clear(lastDoc); doc = docs.nextDoc(); - if (doc == docs.NO_MORE_DOCS) { + if (doc == DocsEnum.NO_MORE_DOCS) { break; } } diff --git a/lucene/contrib/queries/src/java/org/apache/lucene/search/FieldCacheRewriteMethod.java b/lucene/contrib/queries/src/java/org/apache/lucene/search/FieldCacheRewriteMethod.java index e296fcaa5ff..8539f855f9f 100644 --- a/lucene/contrib/queries/src/java/org/apache/lucene/search/FieldCacheRewriteMethod.java +++ b/lucene/contrib/queries/src/java/org/apache/lucene/search/FieldCacheRewriteMethod.java @@ -21,6 +21,7 @@ import java.io.IOException; import java.util.Comparator; import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexReader.AtomicReaderContext; import org.apache.lucene.index.Terms; import org.apache.lucene.index.TermsEnum; import org.apache.lucene.util.BytesRef; @@ -108,8 +109,8 @@ public final class FieldCacheRewriteMethod extends MultiTermQuery.RewriteMethod * results. */ @Override - public DocIdSet getDocIdSet(IndexReader reader) throws IOException { - final FieldCache.DocTermsIndex fcsi = FieldCache.DEFAULT.getTermsIndex(reader, query.field); + public DocIdSet getDocIdSet(AtomicReaderContext context) throws IOException { + final FieldCache.DocTermsIndex fcsi = FieldCache.DEFAULT.getTermsIndex(context.reader, query.field); final OpenBitSet termSet = new OpenBitSet(fcsi.numOrd()); TermsEnum termsEnum = query.getTermsEnum(new Terms() { @@ -122,7 +123,11 @@ public final class FieldCacheRewriteMethod extends MultiTermQuery.RewriteMethod public TermsEnum iterator() throws IOException { return fcsi.getTermsEnum(); } - + + @Override + public long getSumTotalTermFreq() { + return -1; + } }); assert termsEnum != null; @@ -142,7 +147,7 @@ public final class FieldCacheRewriteMethod extends MultiTermQuery.RewriteMethod return DocIdSet.EMPTY_DOCIDSET; } - return new FieldCacheRangeFilter.FieldCacheDocIdSet(reader, true) { + return new FieldCacheRangeFilter.FieldCacheDocIdSet(context.reader, true) { @Override boolean matchDoc(int doc) throws ArrayIndexOutOfBoundsException { return termSet.fastGet(fcsi.getOrd(doc)); diff --git a/lucene/contrib/queries/src/java/org/apache/lucene/search/FuzzyLikeThisQuery.java b/lucene/contrib/queries/src/java/org/apache/lucene/search/FuzzyLikeThisQuery.java index 8c15b89c4a4..13ec241f64c 100644 --- a/lucene/contrib/queries/src/java/org/apache/lucene/search/FuzzyLikeThisQuery.java +++ b/lucene/contrib/queries/src/java/org/apache/lucene/search/FuzzyLikeThisQuery.java @@ -292,7 +292,7 @@ public class FuzzyLikeThisQuery extends Query { //optimize where only one selected variant ScoreTerm st= variants.get(0); - TermQuery tq = new FuzzyTermQuery(st.term,ignoreTF); + Query tq = ignoreTF ? new ConstantScoreQuery(new TermQuery(st.term)) : new TermQuery(st.term, 1); tq.setBoost(st.score); // set the boost to a mix of IDF and score bq.add(tq, BooleanClause.Occur.SHOULD); } @@ -303,7 +303,8 @@ public class FuzzyLikeThisQuery extends Query .hasNext();) { ScoreTerm st = iterator2.next(); - TermQuery tq = new FuzzyTermQuery(st.term,ignoreTF); // found a match + // found a match + Query tq = ignoreTF ? new ConstantScoreQuery(new TermQuery(st.term)) : new TermQuery(st.term, 1); tq.setBoost(st.score); // set the boost using the ScoreTerm's score termVariants.add(tq, BooleanClause.Occur.SHOULD); // add to query } @@ -348,45 +349,8 @@ public class FuzzyLikeThisQuery extends Query return termA.score < termB.score; } - } + } - //overrides basic TermQuery to negate effects of IDF (idf is factored into boost of containing BooleanQuery) - private static class FuzzyTermQuery extends TermQuery - { - boolean ignoreTF; - public FuzzyTermQuery(Term t, boolean ignoreTF) - { - super(t); - this.ignoreTF=ignoreTF; - } - @Override - public Similarity getSimilarity(IndexSearcher searcher) - { - Similarity result = super.getSimilarity(searcher); - result = new SimilarityDelegator(result) { - - @Override - public float tf(float freq) - { - if(ignoreTF) - { - return 1; //ignore tf - } - return super.tf(freq); - } - @Override - public float idf(int docFreq, int numDocs) - { - //IDF is already factored into individual term boosts - return 1; - } - }; - return result; - } - } - - - /* (non-Javadoc) * @see org.apache.lucene.search.Query#toString(java.lang.String) */ diff --git a/lucene/contrib/queries/src/java/org/apache/lucene/search/TermsFilter.java b/lucene/contrib/queries/src/java/org/apache/lucene/search/TermsFilter.java index 5ab5834a5b4..f5c48c90fb2 100644 --- a/lucene/contrib/queries/src/java/org/apache/lucene/search/TermsFilter.java +++ b/lucene/contrib/queries/src/java/org/apache/lucene/search/TermsFilter.java @@ -23,6 +23,7 @@ import java.util.Set; import java.util.TreeSet; import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexReader.AtomicReaderContext; import org.apache.lucene.index.Term; import org.apache.lucene.index.DocsEnum; import org.apache.lucene.index.Terms; @@ -57,7 +58,8 @@ public class TermsFilter extends Filter * @see org.apache.lucene.search.Filter#getDocIdSet(org.apache.lucene.index.IndexReader) */ @Override - public DocIdSet getDocIdSet(IndexReader reader) throws IOException { + public DocIdSet getDocIdSet(AtomicReaderContext context) throws IOException { + IndexReader reader = context.reader; OpenBitSet result=new OpenBitSet(reader.maxDoc()); Fields fields = reader.fields(); BytesRef br = new BytesRef(); diff --git a/lucene/contrib/queries/src/java/org/apache/lucene/search/regex/package.html b/lucene/contrib/queries/src/java/org/apache/lucene/search/regex/package.html index c963307fecb..7b54ddb557e 100644 --- a/lucene/contrib/queries/src/java/org/apache/lucene/search/regex/package.html +++ b/lucene/contrib/queries/src/java/org/apache/lucene/search/regex/package.html @@ -1,3 +1,20 @@ + + Regular expression Query. diff --git a/lucene/contrib/queries/src/java/org/apache/lucene/search/similar/MoreLikeThis.java b/lucene/contrib/queries/src/java/org/apache/lucene/search/similar/MoreLikeThis.java index d8d4af6e044..2b9b429c47a 100644 --- a/lucene/contrib/queries/src/java/org/apache/lucene/search/similar/MoreLikeThis.java +++ b/lucene/contrib/queries/src/java/org/apache/lucene/search/similar/MoreLikeThis.java @@ -92,20 +92,20 @@ import org.apache.lucene.util.PriorityQueue; * if you want pseudo code, the simplest possible usage is as follows. The bold * fragment is specific to this class. * - *
+ * 
  *
  * IndexReader ir = ...
  * IndexSearcher is = ...
- * 
+ * 
  * MoreLikeThis mlt = new MoreLikeThis(ir);
- * Reader target = ... // orig source of doc you want to find similarities to
+ * Reader target = ... // orig source of doc you want to find similarities to
  * Query query = mlt.like( target);
- * 
+ * 
  * Hits hits = is.search(query);
- * // now the usual iteration thru 'hits' - the only thing to watch for is to make sure
- * you ignore the doc if it matches your 'target' document, as it should be similar to itself 
+ * // now the usual iteration thru 'hits' - the only thing to watch for is to make sure
+ * //you ignore the doc if it matches your 'target' document, as it should be similar to itself
  *
- * 
+ *
* * Thus you: *
    diff --git a/lucene/contrib/queries/src/test/org/apache/lucene/search/BooleanFilterTest.java b/lucene/contrib/queries/src/test/org/apache/lucene/search/BooleanFilterTest.java index a9a6766f927..b9a8dfacd6c 100644 --- a/lucene/contrib/queries/src/test/org/apache/lucene/search/BooleanFilterTest.java +++ b/lucene/contrib/queries/src/test/org/apache/lucene/search/BooleanFilterTest.java @@ -24,6 +24,7 @@ import org.apache.lucene.analysis.MockTokenizer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexReader.AtomicReaderContext; import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.index.SlowMultiReaderWrapper; import org.apache.lucene.index.Term; @@ -83,7 +84,7 @@ public class BooleanFilterTest extends LuceneTestCase { private void tstFilterCard(String mes, int expected, Filter filt) throws Throwable { - DocIdSetIterator disi = filt.getDocIdSet(reader).iterator(); + DocIdSetIterator disi = filt.getDocIdSet(new AtomicReaderContext(reader)).iterator(); int actual = 0; while (disi.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) { actual++; diff --git a/lucene/contrib/queries/src/test/org/apache/lucene/search/ChainedFilterTest.java b/lucene/contrib/queries/src/test/org/apache/lucene/search/ChainedFilterTest.java index b2b16f8db96..1494dfeb0e2 100644 --- a/lucene/contrib/queries/src/test/org/apache/lucene/search/ChainedFilterTest.java +++ b/lucene/contrib/queries/src/test/org/apache/lucene/search/ChainedFilterTest.java @@ -72,7 +72,7 @@ public class ChainedFilterTest extends LuceneTestCase { reader = writer.getReader(); writer.close(); - searcher = new IndexSearcher(reader); + searcher = newSearcher(reader); // query for everything to make life easier BooleanQuery bq = new BooleanQuery(); @@ -194,7 +194,7 @@ public class ChainedFilterTest extends LuceneTestCase { IndexReader reader = writer.getReader(); writer.close(); - IndexSearcher searcher = new IndexSearcher(reader); + IndexSearcher searcher = newSearcher(reader); Query query = new TermQuery(new Term("none", "none")); diff --git a/lucene/contrib/queries/src/test/org/apache/lucene/search/DuplicateFilterTest.java b/lucene/contrib/queries/src/test/org/apache/lucene/search/DuplicateFilterTest.java index a040d303fb8..29c7f0f2e37 100644 --- a/lucene/contrib/queries/src/test/org/apache/lucene/search/DuplicateFilterTest.java +++ b/lucene/contrib/queries/src/test/org/apache/lucene/search/DuplicateFilterTest.java @@ -20,16 +20,17 @@ package org.apache.lucene.search; import java.io.IOException; import java.util.HashSet; +import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; +import org.apache.lucene.index.DocsEnum; import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.MultiFields; import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.index.Term; -import org.apache.lucene.index.DocsEnum; -import org.apache.lucene.index.MultiFields; import org.apache.lucene.store.Directory; -import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.LuceneTestCase; public class DuplicateFilterTest extends LuceneTestCase { private static final String KEY_FIELD = "url"; @@ -42,7 +43,7 @@ public class DuplicateFilterTest extends LuceneTestCase { public void setUp() throws Exception { super.setUp(); directory = newDirectory(); - RandomIndexWriter writer = new RandomIndexWriter(random, directory); + RandomIndexWriter writer = new RandomIndexWriter(random, directory, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).setMergePolicy(newInOrderLogMergePolicy())); //Add series of docs with filterable fields : url, text and dates flags addDoc(writer, "http://lucene.apache.org", "lucene 1.4.3 available", "20040101"); @@ -60,7 +61,7 @@ public class DuplicateFilterTest extends LuceneTestCase { reader = writer.getReader(); writer.close(); - searcher =new IndexSearcher(reader); + searcher =newSearcher(reader); } diff --git a/lucene/contrib/queries/src/test/org/apache/lucene/search/FuzzyLikeThisQueryTest.java b/lucene/contrib/queries/src/test/org/apache/lucene/search/FuzzyLikeThisQueryTest.java index 5f2bec5b04c..0f9b6ca7712 100644 --- a/lucene/contrib/queries/src/test/org/apache/lucene/search/FuzzyLikeThisQueryTest.java +++ b/lucene/contrib/queries/src/test/org/apache/lucene/search/FuzzyLikeThisQueryTest.java @@ -40,7 +40,7 @@ public class FuzzyLikeThisQueryTest extends LuceneTestCase { public void setUp() throws Exception { super.setUp(); directory = newDirectory(); - RandomIndexWriter writer = new RandomIndexWriter(random, directory); + RandomIndexWriter writer = new RandomIndexWriter(random, directory, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).setMergePolicy(newInOrderLogMergePolicy())); //Add series of docs with misspelt names addDoc(writer, "jonathon smythe","1"); @@ -51,7 +51,7 @@ public class FuzzyLikeThisQueryTest extends LuceneTestCase { addDoc(writer, "johnathon smythe","6"); reader = writer.getReader(); writer.close(); - searcher=new IndexSearcher(reader); + searcher=newSearcher(reader); } @Override diff --git a/lucene/contrib/queries/src/test/org/apache/lucene/search/TermsFilterTest.java b/lucene/contrib/queries/src/test/org/apache/lucene/search/TermsFilterTest.java index 454b6de9165..dfe6f8b8f6e 100644 --- a/lucene/contrib/queries/src/test/org/apache/lucene/search/TermsFilterTest.java +++ b/lucene/contrib/queries/src/test/org/apache/lucene/search/TermsFilterTest.java @@ -21,6 +21,7 @@ import java.util.HashSet; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexReader.AtomicReaderContext; import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.index.Term; import org.apache.lucene.store.Directory; @@ -59,23 +60,26 @@ public class TermsFilterTest extends LuceneTestCase { w.addDocument(doc); } IndexReader reader = new SlowMultiReaderWrapper(w.getReader()); + assertTrue(reader.getTopReaderContext().isAtomic); + AtomicReaderContext context = (AtomicReaderContext) reader.getTopReaderContext(); + assertTrue(context.isAtomic); w.close(); TermsFilter tf=new TermsFilter(); tf.addTerm(new Term(fieldName,"19")); - OpenBitSet bits = (OpenBitSet)tf.getDocIdSet(reader); + OpenBitSet bits = (OpenBitSet)tf.getDocIdSet(context); assertEquals("Must match nothing", 0, bits.cardinality()); tf.addTerm(new Term(fieldName,"20")); - bits = (OpenBitSet)tf.getDocIdSet(reader); + bits = (OpenBitSet)tf.getDocIdSet(context); assertEquals("Must match 1", 1, bits.cardinality()); tf.addTerm(new Term(fieldName,"10")); - bits = (OpenBitSet)tf.getDocIdSet(reader); + bits = (OpenBitSet)tf.getDocIdSet(context); assertEquals("Must match 2", 2, bits.cardinality()); tf.addTerm(new Term(fieldName,"00")); - bits = (OpenBitSet)tf.getDocIdSet(reader); + bits = (OpenBitSet)tf.getDocIdSet(context); assertEquals("Must match 2", 2, bits.cardinality()); reader.close(); diff --git a/lucene/contrib/queries/src/test/org/apache/lucene/search/regex/TestRegexQuery.java b/lucene/contrib/queries/src/test/org/apache/lucene/search/regex/TestRegexQuery.java index 8fa1ba82469..1e7d1559d69 100644 --- a/lucene/contrib/queries/src/test/org/apache/lucene/search/regex/TestRegexQuery.java +++ b/lucene/contrib/queries/src/test/org/apache/lucene/search/regex/TestRegexQuery.java @@ -51,7 +51,7 @@ public class TestRegexQuery extends LuceneTestCase { writer.addDocument(doc); reader = writer.getReader(); writer.close(); - searcher = new IndexSearcher(reader); + searcher = newSearcher(reader); } @Override diff --git a/lucene/contrib/queries/src/test/org/apache/lucene/search/similar/TestMoreLikeThis.java b/lucene/contrib/queries/src/test/org/apache/lucene/search/similar/TestMoreLikeThis.java index 09a45246be3..6de5e91ddc5 100644 --- a/lucene/contrib/queries/src/test/org/apache/lucene/search/similar/TestMoreLikeThis.java +++ b/lucene/contrib/queries/src/test/org/apache/lucene/search/similar/TestMoreLikeThis.java @@ -53,7 +53,7 @@ public class TestMoreLikeThis extends LuceneTestCase { reader = writer.getReader(); writer.close(); - searcher = new IndexSearcher(reader); + searcher = newSearcher(reader); } @Override diff --git a/lucene/contrib/queryparser/pom.xml.template b/lucene/contrib/queryparser/pom.xml.template deleted file mode 100644 index 96b5c23bfb2..00000000000 --- a/lucene/contrib/queryparser/pom.xml.template +++ /dev/null @@ -1,40 +0,0 @@ - - - - - 4.0.0 - - org.apache.lucene - lucene-contrib - @version@ - - org.apache.lucene - lucene-queryparser - Lucene Query Parser - @version@ - - This is the Flexible Query Parser for apache lucene java - - jar - - - diff --git a/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/core/builders/QueryTreeBuilder.java b/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/core/builders/QueryTreeBuilder.java index 9cae3ac0d68..fe20a8452bb 100644 --- a/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/core/builders/QueryTreeBuilder.java +++ b/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/core/builders/QueryTreeBuilder.java @@ -61,7 +61,7 @@ public class QueryTreeBuilder implements QueryBuilder { private HashMap, QueryBuilder> queryNodeBuilders; - private HashMap fieldNameBuilders; + private HashMap fieldNameBuilders; /** * {@link QueryTreeBuilder} constructor. @@ -73,28 +73,25 @@ public class QueryTreeBuilder implements QueryBuilder { /** * Associates a field name with a builder. * - * @param fieldName - * the field name - * @param builder - * the builder to be associated + * @param fieldName the field name + * @param builder the builder to be associated */ public void setBuilder(CharSequence fieldName, QueryBuilder builder) { if (this.fieldNameBuilders == null) { - this.fieldNameBuilders = new HashMap(); + this.fieldNameBuilders = new HashMap(); } - this.fieldNameBuilders.put(fieldName, builder); + this.fieldNameBuilders.put(fieldName.toString(), builder); + } /** * Associates a class with a builder * - * @param queryNodeClass - * the class - * @param builder - * the builder to be associated + * @param queryNodeClass the class + * @param builder the builder to be associated */ public void setBuilder(Class queryNodeClass, QueryBuilder builder) { @@ -135,8 +132,13 @@ public class QueryTreeBuilder implements QueryBuilder { QueryBuilder builder = null; if (this.fieldNameBuilders != null && node instanceof FieldableNode) { + CharSequence field = ((FieldableNode) node).getField(); - builder = this.fieldNameBuilders.get(((FieldableNode) node).getField()); + if (field != null) { + field = field.toString(); + } + + builder = this.fieldNameBuilders.get(field); } @@ -203,14 +205,13 @@ public class QueryTreeBuilder implements QueryBuilder { * Builds some kind of object from a query tree. Each node in the query tree * is built using an specific builder associated to it. * - * @param queryNode - * the query tree root node + * @param queryNode the query tree root node * * @return the built object * - * @throws QueryNodeException - * if some node builder throws a {@link QueryNodeException} or if - * there is a node which had no builder associated to it + * @throws QueryNodeException if some node builder throws a + * {@link QueryNodeException} or if there is a node which had no + * builder associated to it */ public Object build(QueryNode queryNode) throws QueryNodeException { process(queryNode); diff --git a/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/core/config/FieldConfig.java b/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/core/config/FieldConfig.java index 1036a86950f..7c582972bc7 100644 --- a/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/core/config/FieldConfig.java +++ b/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/core/config/FieldConfig.java @@ -28,17 +28,15 @@ import org.apache.lucene.util.AttributeSource; */ public class FieldConfig extends AttributeSource { - private CharSequence fieldName; - + private String fieldName; + /** * Constructs a {@link FieldConfig} * - * @param fieldName - * the field name, it cannot be null - * @throws IllegalArgumentException - * if the field name is null + * @param fieldName the field name, it cannot be null + * @throws IllegalArgumentException if the field name is null */ - public FieldConfig(CharSequence fieldName) { + public FieldConfig(String fieldName) { if (fieldName == null) { throw new IllegalArgumentException("field name should not be null!"); @@ -53,13 +51,14 @@ public class FieldConfig extends AttributeSource { * * @return the field name */ - public CharSequence getFieldName() { + public String getField() { return this.fieldName; } - + @Override - public String toString(){ - return ""; + public String toString() { + return ""; } } diff --git a/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/core/config/QueryConfigHandler.java b/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/core/config/QueryConfigHandler.java index ad22c198928..f50ff443eea 100644 --- a/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/core/config/QueryConfigHandler.java +++ b/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/core/config/QueryConfigHandler.java @@ -46,7 +46,7 @@ import org.apache.lucene.util.AttributeSource; public abstract class QueryConfigHandler extends AttributeSource { private LinkedList listeners = new LinkedList(); - + /** * Returns an implementation of * {@link FieldConfig} for a specific field name. If the implemented @@ -60,7 +60,7 @@ public abstract class QueryConfigHandler extends AttributeSource { * configuration or null, if the implemented * {@link QueryConfigHandler} has no configuration for that field */ - public FieldConfig getFieldConfig(CharSequence fieldName) { + public FieldConfig getFieldConfig(String fieldName) { FieldConfig fieldConfig = new FieldConfig(fieldName); for (FieldConfigListener listener : this.listeners) { diff --git a/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/core/nodes/QueryNode.java b/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/core/nodes/QueryNode.java index f1afdaec59b..fac89f59f10 100644 --- a/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/core/nodes/QueryNode.java +++ b/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/core/nodes/QueryNode.java @@ -43,14 +43,14 @@ public interface QueryNode extends Serializable { public boolean isLeaf(); /** verify if a node contains a tag */ - public boolean containsTag(CharSequence tagName); - + public boolean containsTag(String tagName); + /** * @param tagName * @return of stored on under that tag name */ - public Object getTag(CharSequence tagName); - + public Object getTag(String tagName); + public QueryNode getParent(); /** @@ -81,15 +81,20 @@ public interface QueryNode extends Serializable { * @param tagName * @param value */ - public void setTag(CharSequence tagName, Object value); - + public void setTag(String tagName, Object value); + /** * Unset a tag. tagName will be converted to lowercase. * * @param tagName */ - public void unsetTag(CharSequence tagName); - - public Map getTags(); + public void unsetTag(String tagName); + + /** + * Returns a map containing all tags attached to this query node. + * + * @return a map containing all tags attached to this query node + */ + public Map getTagMap(); } diff --git a/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/core/nodes/QueryNodeImpl.java b/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/core/nodes/QueryNodeImpl.java index e6316886838..6b48cabb869 100644 --- a/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/core/nodes/QueryNodeImpl.java +++ b/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/core/nodes/QueryNodeImpl.java @@ -25,6 +25,7 @@ import java.util.ResourceBundle; import org.apache.lucene.messages.NLS; import org.apache.lucene.queryParser.core.messages.QueryParserMessages; +import org.apache.lucene.queryParser.core.util.StringUtils; /** * A {@link QueryNodeImpl} is the default implementation of the interface @@ -40,7 +41,7 @@ public abstract class QueryNodeImpl implements QueryNode, Cloneable { private boolean isLeaf = true; - private Hashtable tags = new Hashtable(); + private Hashtable tags = new Hashtable(); private List clauses = null; @@ -117,7 +118,7 @@ public abstract class QueryNodeImpl implements QueryNode, Cloneable { clone.isLeaf = this.isLeaf; // Reset all tags - clone.tags = new Hashtable(); + clone.tags = new Hashtable(); // copy children if (this.clauses != null) { @@ -151,19 +152,20 @@ public abstract class QueryNodeImpl implements QueryNode, Cloneable { return this.clauses; } - public void setTag(CharSequence tagName, Object value) { - this.tags.put(tagName.toString().toLowerCase(), value); + public void setTag(String tagName, Object value) { + this.tags.put(tagName.toLowerCase(), value); } - public void unsetTag(CharSequence tagName) { - this.tags.remove(tagName.toString().toLowerCase()); + public void unsetTag(String tagName) { + this.tags.remove(tagName.toLowerCase()); } - public boolean containsTag(CharSequence tagName) { - return this.tags.containsKey(tagName.toString().toLowerCase()); + /** verify if a node contains a tag */ + public boolean containsTag(String tagName) { + return this.tags.containsKey(tagName); } - public Object getTag(CharSequence tagName) { + public Object getTag(String tagName) { return this.tags.get(tagName.toString().toLowerCase()); } @@ -189,16 +191,20 @@ public abstract class QueryNodeImpl implements QueryNode, Cloneable { /** * This method is use toQueryString to detect if fld is the default field * - * @param fld - * - field name + * @param fld - field name * @return true if fld is the default field */ + // TODO: remove this method, it's commonly used by {@link + // #toQueryString(org.apache.lucene.queryParser.core.parser.EscapeQuerySyntax)} + // to figure out what is the default field, however, {@link + // #toQueryString(org.apache.lucene.queryParser.core.parser.EscapeQuerySyntax)} + // should receive the default field value directly by parameter protected boolean isDefaultField(CharSequence fld) { if (this.toQueryStringIgnoreFields) return true; if (fld == null) return true; - if (QueryNodeImpl.PLAINTEXT_FIELD_NAME.equals(fld.toString())) + if (QueryNodeImpl.PLAINTEXT_FIELD_NAME.equals(StringUtils.toString(fld))) return true; return false; } @@ -216,12 +222,13 @@ public abstract class QueryNodeImpl implements QueryNode, Cloneable { } /** - * @see org.apache.lucene.queryParser.core.nodes.QueryNode#getTag(CharSequence) - * @return a Map with all tags for this QueryNode + * Returns a map containing all tags attached to this query node. + * + * @return a map containing all tags attached to this query node */ - @SuppressWarnings( { "unchecked" }) - public Map getTags() { - return (Map) this.tags.clone(); + @SuppressWarnings("unchecked") + public Map getTagMap() { + return (Map) this.tags.clone(); } } // end class QueryNodeImpl diff --git a/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/core/util/StringUtils.java b/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/core/util/StringUtils.java new file mode 100644 index 00000000000..fe0e51b9e8c --- /dev/null +++ b/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/core/util/StringUtils.java @@ -0,0 +1,33 @@ +package org.apache.lucene.queryParser.core.util; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +final public class StringUtils { + + public static String toString(Object obj) { + + if (obj != null) { + return obj.toString(); + + } else { + return null; + } + + } + +} diff --git a/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/builders/StandardBooleanQueryNodeBuilder.java b/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/builders/StandardBooleanQueryNodeBuilder.java index 312728690f2..b2e898dc870 100644 --- a/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/builders/StandardBooleanQueryNodeBuilder.java +++ b/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/builders/StandardBooleanQueryNodeBuilder.java @@ -31,7 +31,7 @@ import org.apache.lucene.queryParser.standard.parser.EscapeQuerySyntaxImpl; import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.Query; -import org.apache.lucene.search.Similarity; +import org.apache.lucene.search.SimilarityProvider; import org.apache.lucene.search.BooleanQuery.TooManyClauses; /** @@ -41,7 +41,7 @@ import org.apache.lucene.search.BooleanQuery.TooManyClauses; * * @see BooleanQueryNodeBuilder * @see BooleanQuery - * @see Similarity#coord(int, int) + * @see SimilarityProvider#coord(int, int) */ public class StandardBooleanQueryNodeBuilder implements StandardQueryBuilder { diff --git a/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/config/FieldBoostMapFCListener.java b/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/config/FieldBoostMapFCListener.java index 5e8399e82e9..ab93ee9d531 100644 --- a/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/config/FieldBoostMapFCListener.java +++ b/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/config/FieldBoostMapFCListener.java @@ -47,7 +47,7 @@ public class FieldBoostMapFCListener implements FieldConfigListener { FieldBoostMapAttribute fieldBoostMapAttr = this.config.getAttribute(FieldBoostMapAttribute.class); BoostAttribute boostAttr = fieldConfig.addAttribute(BoostAttribute.class); - Float boost = fieldBoostMapAttr.getFieldBoostMap().get(fieldConfig.getFieldName()); + Float boost = fieldBoostMapAttr.getFieldBoostMap().get(fieldConfig.getField()); if (boost != null) { boostAttr.setBoost(boost.floatValue()); diff --git a/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/config/FieldDateResolutionFCListener.java b/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/config/FieldDateResolutionFCListener.java index 7d6b66c8510..0bdb9abc8ac 100644 --- a/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/config/FieldDateResolutionFCListener.java +++ b/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/config/FieldDateResolutionFCListener.java @@ -53,7 +53,7 @@ public class FieldDateResolutionFCListener implements FieldConfigListener { FieldDateResolutionMapAttribute dateResMapAttr = this.config .addAttribute(FieldDateResolutionMapAttribute.class); dateRes = dateResMapAttr.getFieldDateResolutionMap().get( - fieldConfig.getFieldName().toString()); + fieldConfig.getField()); } if (dateRes == null) { diff --git a/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/config/MultiTermRewriteMethodAttribute.java b/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/config/MultiTermRewriteMethodAttribute.java index 84924e34599..bb0559427df 100644 --- a/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/config/MultiTermRewriteMethodAttribute.java +++ b/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/config/MultiTermRewriteMethodAttribute.java @@ -32,7 +32,7 @@ import org.apache.lucene.util.Attribute; */ public interface MultiTermRewriteMethodAttribute extends Attribute { - public static final CharSequence TAG_ID = "MultiTermRewriteMethodAttribute"; + public static final String TAG_ID = "MultiTermRewriteMethodAttribute"; public void setMultiTermRewriteMethod(MultiTermQuery.RewriteMethod method); diff --git a/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/nodes/StandardBooleanQueryNode.java b/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/nodes/StandardBooleanQueryNode.java index 3ddd6396d1e..d09d234a04d 100644 --- a/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/nodes/StandardBooleanQueryNode.java +++ b/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/nodes/StandardBooleanQueryNode.java @@ -22,14 +22,14 @@ import java.util.List; import org.apache.lucene.queryParser.core.nodes.BooleanQueryNode; import org.apache.lucene.queryParser.core.nodes.QueryNode; import org.apache.lucene.search.BooleanQuery; -import org.apache.lucene.search.Similarity; +import org.apache.lucene.search.SimilarityProvider; /** * A {@link StandardBooleanQueryNode} has the same behavior as * {@link BooleanQueryNode}. It only indicates if the coord should be enabled or * not for this boolean query.
    * - * @see Similarity#coord(int, int) + * @see SimilarityProvider#coord(int, int) * @see BooleanQuery */ public class StandardBooleanQueryNode extends BooleanQueryNode { diff --git a/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/processors/BoostQueryNodeProcessor.java b/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/processors/BoostQueryNodeProcessor.java index 57f0cc59f75..5232ffa0184 100644 --- a/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/processors/BoostQueryNodeProcessor.java +++ b/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/processors/BoostQueryNodeProcessor.java @@ -26,6 +26,7 @@ import org.apache.lucene.queryParser.core.nodes.BoostQueryNode; import org.apache.lucene.queryParser.core.nodes.FieldableNode; import org.apache.lucene.queryParser.core.nodes.QueryNode; import org.apache.lucene.queryParser.core.processors.QueryNodeProcessorImpl; +import org.apache.lucene.queryParser.core.util.StringUtils; import org.apache.lucene.queryParser.standard.config.BoostAttribute; /** @@ -49,7 +50,8 @@ public class BoostQueryNodeProcessor extends QueryNodeProcessorImpl { QueryConfigHandler config = getQueryConfigHandler(); if (config != null) { - FieldConfig fieldConfig = config.getFieldConfig(fieldNode.getField()); + CharSequence field = fieldNode.getField(); + FieldConfig fieldConfig = config.getFieldConfig(StringUtils.toString(field)); if (fieldConfig != null && fieldConfig.hasAttribute(BoostAttribute.class)) { BoostAttribute boostAttr = fieldConfig.getAttribute(BoostAttribute.class); diff --git a/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/processors/ParametricRangeQueryNodeProcessor.java b/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/processors/ParametricRangeQueryNodeProcessor.java index 0947475d637..fc2e5fa020b 100644 --- a/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/processors/ParametricRangeQueryNodeProcessor.java +++ b/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/processors/ParametricRangeQueryNodeProcessor.java @@ -97,8 +97,15 @@ public class ParametricRangeQueryNodeProcessor extends QueryNodeProcessorImpl { } - FieldConfig fieldConfig = getQueryConfigHandler().getFieldConfig( - parametricRangeNode.getField()); + CharSequence field = parametricRangeNode.getField(); + String fieldStr = null; + + if (field != null) { + fieldStr = field.toString(); + } + + FieldConfig fieldConfig = getQueryConfigHandler() + .getFieldConfig(fieldStr); if (fieldConfig != null) { diff --git a/lucene/contrib/queryparser/src/java/overview.html b/lucene/contrib/queryparser/src/java/overview.html index 4b89bf053c7..d3df6f0722c 100644 --- a/lucene/contrib/queryparser/src/java/overview.html +++ b/lucene/contrib/queryparser/src/java/overview.html @@ -138,11 +138,6 @@ you don't need to worry about dealing with those. config.setAnalyzer(new WhitespaceAnalyzer()); Query query = qpHelper.parse("apache AND lucene", "defaultField");
-

-To make it easy for people who are using current Lucene's query parser to switch to -the new one, there is a {@link org.apache.lucene.queryParser.standard.QueryParserWrapper} under org.apache.lucene.queryParser.standard -that keeps the old query parser interface, but uses the new query parser infrastructure. -

diff --git a/lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/core/builders/TestQueryTreeBuilder.java b/lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/core/builders/TestQueryTreeBuilder.java new file mode 100644 index 00000000000..88ad9a21b16 --- /dev/null +++ b/lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/core/builders/TestQueryTreeBuilder.java @@ -0,0 +1,48 @@ +package org.apache.lucene.queryParser.core.builders; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import junit.framework.Assert; + +import org.apache.lucene.queryParser.core.QueryNodeException; +import org.apache.lucene.queryParser.core.nodes.FieldQueryNode; +import org.apache.lucene.queryParser.core.nodes.QueryNode; +import org.apache.lucene.queryParser.core.util.UnescapedCharSequence; +import org.apache.lucene.util.LuceneTestCase; +import org.junit.Test; + +public class TestQueryTreeBuilder extends LuceneTestCase { + + @Test + public void testSetFieldBuilder() throws QueryNodeException { + QueryTreeBuilder qtb = new QueryTreeBuilder(); + qtb.setBuilder("field", new DummyBuilder()); + Object result = qtb.build(new FieldQueryNode(new UnescapedCharSequence("field"), "foo", 0, 0)); + Assert.assertEquals("OK", result); + + } + + private static class DummyBuilder implements QueryBuilder { + + public Object build(QueryNode queryNode) throws QueryNodeException { + return "OK"; + } + + } + +} diff --git a/lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/precedence/TestPrecedenceQueryParser.java b/lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/precedence/TestPrecedenceQueryParser.java index cf0c8876064..5cba05b3111 100644 --- a/lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/precedence/TestPrecedenceQueryParser.java +++ b/lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/precedence/TestPrecedenceQueryParser.java @@ -84,6 +84,7 @@ public class TestPrecedenceQueryParser extends LuceneTestCase { OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class); + @Override public boolean incrementToken() throws IOException { if (inPhrase) { inPhrase = false; @@ -108,6 +109,7 @@ public class TestPrecedenceQueryParser extends LuceneTestCase { public static final class QPTestAnalyzer extends Analyzer { /** Filters MockTokenizer with StopFilter. */ + @Override public final TokenStream tokenStream(String fieldName, Reader reader) { return new QPTestFilter(new MockTokenizer(reader, MockTokenizer.SIMPLE, true)); } @@ -115,6 +117,7 @@ public class TestPrecedenceQueryParser extends LuceneTestCase { private int originalMaxClauses; + @Override public void setUp() throws Exception { super.setUp(); originalMaxClauses = BooleanQuery.getMaxClauseCount(); @@ -567,6 +570,12 @@ public class TestPrecedenceQueryParser extends LuceneTestCase { // too many boolean clauses, so ParseException is expected } } + + // LUCENE-792 + public void testNOT() throws Exception { + Analyzer a = new MockAnalyzer(MockTokenizer.WHITESPACE, false); + assertQueryEquals("NOT foo AND bar", a, "-foo +bar"); + } /** * This test differs from the original QueryParser, showing how the precedence @@ -621,6 +630,7 @@ public class TestPrecedenceQueryParser extends LuceneTestCase { } + @Override public void tearDown() { BooleanQuery.setMaxClauseCount(originalMaxClauses); } diff --git a/lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/spans/SpansQueryConfigHandler.java b/lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/spans/SpansQueryConfigHandler.java index 520d4efeb73..b614938f9ee 100644 --- a/lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/spans/SpansQueryConfigHandler.java +++ b/lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/spans/SpansQueryConfigHandler.java @@ -33,7 +33,7 @@ public class SpansQueryConfigHandler extends QueryConfigHandler { } @Override - public FieldConfig getFieldConfig(CharSequence fieldName) { + public FieldConfig getFieldConfig(String fieldName) { // there is no field configuration, always return null return null; diff --git a/lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/TestQPHelper.java b/lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/TestQPHelper.java index 4883f6bef05..2c8b4b61ccf 100644 --- a/lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/TestQPHelper.java +++ b/lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/TestQPHelper.java @@ -1277,12 +1277,13 @@ public class TestQPHelper extends LuceneTestCase { Document doc = new Document(); doc.add(newField("field", "", Field.Store.NO, Field.Index.ANALYZED)); w.addDocument(doc); - IndexReader r = IndexReader.open(w); - IndexSearcher s = new IndexSearcher(r); + IndexReader r = IndexReader.open(w, true); + IndexSearcher s = newSearcher(r); Query q = new StandardQueryParser(new CannedAnalyzer()).parse("\"a\"", "field"); assertTrue(q instanceof MultiPhraseQuery); assertEquals(1, s.search(q, 10).totalHits); + s.close(); r.close(); w.close(); dir.close(); diff --git a/lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/config/TestAttributes.java b/lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/config/TestAttributes.java new file mode 100644 index 00000000000..50275eec8c8 --- /dev/null +++ b/lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/config/TestAttributes.java @@ -0,0 +1,67 @@ +package org.apache.lucene.queryParser.standard.config; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.util._TestUtil; +import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.search.FuzzyQuery; +import org.apache.lucene.search.MultiTermQuery; + +import java.util.Collections; +import java.util.HashMap; +import java.util.Locale; + +public class TestAttributes extends LuceneTestCase { + + // this checks using reflection API if the defaults are correct + public void testAttributes() { + _TestUtil.assertAttributeReflection(new AllowLeadingWildcardAttributeImpl(), + Collections.singletonMap(AllowLeadingWildcardAttribute.class.getName()+"#allowLeadingWildcard", false)); + _TestUtil.assertAttributeReflection(new AnalyzerAttributeImpl(), + Collections.singletonMap(AnalyzerAttribute.class.getName()+"#analyzer", null)); + _TestUtil.assertAttributeReflection(new BoostAttributeImpl(), + Collections.singletonMap(BoostAttribute.class.getName()+"#boost", 1.0f)); + _TestUtil.assertAttributeReflection(new DateResolutionAttributeImpl(), + Collections.singletonMap(DateResolutionAttribute.class.getName()+"#dateResolution", null)); + _TestUtil.assertAttributeReflection(new DefaultOperatorAttributeImpl(), + Collections.singletonMap(DefaultOperatorAttribute.class.getName()+"#operator", DefaultOperatorAttribute.Operator.OR)); + _TestUtil.assertAttributeReflection(new DefaultPhraseSlopAttributeImpl(), + Collections.singletonMap(DefaultPhraseSlopAttribute.class.getName()+"#defaultPhraseSlop", 0)); + _TestUtil.assertAttributeReflection(new FieldBoostMapAttributeImpl(), + Collections.singletonMap(FieldBoostMapAttribute.class.getName()+"#boosts", Collections.emptyMap())); + _TestUtil.assertAttributeReflection(new FieldDateResolutionMapAttributeImpl(), + Collections.singletonMap(FieldDateResolutionMapAttribute.class.getName()+"#dateRes", Collections.emptyMap())); + _TestUtil.assertAttributeReflection(new FuzzyAttributeImpl(), new HashMap() {{ + put(FuzzyAttribute.class.getName()+"#prefixLength", FuzzyQuery.defaultPrefixLength); + put(FuzzyAttribute.class.getName()+"#minSimilarity", FuzzyQuery.defaultMinSimilarity); + }}); + _TestUtil.assertAttributeReflection(new LocaleAttributeImpl(), + Collections.singletonMap(LocaleAttribute.class.getName()+"#locale", Locale.getDefault())); + _TestUtil.assertAttributeReflection(new LowercaseExpandedTermsAttributeImpl(), + Collections.singletonMap(LowercaseExpandedTermsAttribute.class.getName()+"#lowercaseExpandedTerms", true)); + _TestUtil.assertAttributeReflection(new MultiFieldAttributeImpl(), + Collections.singletonMap(MultiFieldAttribute.class.getName()+"#fields", null)); + _TestUtil.assertAttributeReflection(new MultiTermRewriteMethodAttributeImpl(), + Collections.singletonMap(MultiTermRewriteMethodAttribute.class.getName()+"#multiTermRewriteMethod", MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT)); + _TestUtil.assertAttributeReflection(new PositionIncrementsAttributeImpl(), + Collections.singletonMap(PositionIncrementsAttribute.class.getName()+"#positionIncrementsEnabled", false)); + _TestUtil.assertAttributeReflection(new RangeCollatorAttributeImpl(), + Collections.singletonMap(RangeCollatorAttribute.class.getName()+"#rangeCollator", null)); + } + +} diff --git a/lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/surround/query/BooleanQueryTst.java b/lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/surround/query/BooleanQueryTst.java index c89127cde9d..bba9321f3b4 100644 --- a/lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/surround/query/BooleanQueryTst.java +++ b/lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/surround/query/BooleanQueryTst.java @@ -19,7 +19,7 @@ package org.apache.lucene.queryParser.surround.query; import java.io.IOException; -import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexReader.AtomicReaderContext; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Collector; import org.apache.lucene.search.Scorer; @@ -77,8 +77,8 @@ public class BooleanQueryTst { } @Override - public void setNextReader(IndexReader reader, int docBase) throws IOException { - this.docBase = docBase; + public void setNextReader(AtomicReaderContext context) throws IOException { + docBase = context.docBase; } @Override diff --git a/lucene/contrib/spatial/src/java/org/apache/lucene/spatial/geohash/GeoHashDistanceFilter.java b/lucene/contrib/spatial/src/java/org/apache/lucene/spatial/geohash/GeoHashDistanceFilter.java index 69431f71f26..2751dbc9e34 100644 --- a/lucene/contrib/spatial/src/java/org/apache/lucene/spatial/geohash/GeoHashDistanceFilter.java +++ b/lucene/contrib/spatial/src/java/org/apache/lucene/spatial/geohash/GeoHashDistanceFilter.java @@ -19,7 +19,7 @@ package org.apache.lucene.spatial.geohash; import java.io.IOException; -import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexReader.AtomicReaderContext; import org.apache.lucene.search.FieldCache; import org.apache.lucene.search.FieldCache.DocTerms; import org.apache.lucene.search.Filter; @@ -62,15 +62,15 @@ public class GeoHashDistanceFilter extends DistanceFilter { } @Override - public DocIdSet getDocIdSet(IndexReader reader) throws IOException { + public DocIdSet getDocIdSet(AtomicReaderContext context) throws IOException { - final DocTerms geoHashValues = FieldCache.DEFAULT.getTerms(reader, geoHashField); + final DocTerms geoHashValues = FieldCache.DEFAULT.getTerms(context.reader, geoHashField); final BytesRef br = new BytesRef(); final int docBase = nextDocBase; - nextDocBase += reader.maxDoc(); + nextDocBase += context.reader.maxDoc(); - return new FilteredDocIdSet(startingFilter.getDocIdSet(reader)) { + return new FilteredDocIdSet(startingFilter.getDocIdSet(context)) { @Override public boolean match(int doc) { diff --git a/lucene/contrib/spatial/src/java/org/apache/lucene/spatial/geohash/GeoHashUtils.java b/lucene/contrib/spatial/src/java/org/apache/lucene/spatial/geohash/GeoHashUtils.java index be7c1433d54..5ace9adc87e 100644 --- a/lucene/contrib/spatial/src/java/org/apache/lucene/spatial/geohash/GeoHashUtils.java +++ b/lucene/contrib/spatial/src/java/org/apache/lucene/spatial/geohash/GeoHashUtils.java @@ -22,7 +22,7 @@ import java.util.Map; /** * Utilities for encoding and decoding geohashes. Based on - * http://en.wikipedia.org/wiki/Geohash. + * http://en.wikipedia.org/wiki/Geohash. */ public class GeoHashUtils { diff --git a/lucene/contrib/spatial/src/java/org/apache/lucene/spatial/tier/CartesianShapeFilter.java b/lucene/contrib/spatial/src/java/org/apache/lucene/spatial/tier/CartesianShapeFilter.java index 11527f396d4..6ee8fbeb771 100644 --- a/lucene/contrib/spatial/src/java/org/apache/lucene/spatial/tier/CartesianShapeFilter.java +++ b/lucene/contrib/spatial/src/java/org/apache/lucene/spatial/tier/CartesianShapeFilter.java @@ -20,7 +20,7 @@ import java.io.IOException; import java.util.List; import org.apache.lucene.index.DocsEnum; -import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexReader.AtomicReaderContext; import org.apache.lucene.search.Filter; import org.apache.lucene.search.DocIdSet; import org.apache.lucene.search.DocIdSetIterator; @@ -45,8 +45,8 @@ public class CartesianShapeFilter extends Filter { } @Override - public DocIdSet getDocIdSet(final IndexReader reader) throws IOException { - final Bits delDocs = reader.getDeletedDocs(); + public DocIdSet getDocIdSet(final AtomicReaderContext context) throws IOException { + final Bits delDocs = context.reader.getDeletedDocs(); final List area = shape.getArea(); final int sz = area.size(); @@ -58,7 +58,7 @@ public class CartesianShapeFilter extends Filter { return new DocIdSet() { @Override public DocIdSetIterator iterator() throws IOException { - return reader.termDocsEnum(delDocs, fieldName, bytesRef); + return context.reader.termDocsEnum(delDocs, fieldName, bytesRef); } @Override @@ -67,11 +67,11 @@ public class CartesianShapeFilter extends Filter { } }; } else { - final OpenBitSet bits = new OpenBitSet(reader.maxDoc()); + final OpenBitSet bits = new OpenBitSet(context.reader.maxDoc()); for (int i =0; i< sz; i++) { double boxId = area.get(i).doubleValue(); NumericUtils.longToPrefixCoded(NumericUtils.doubleToSortableLong(boxId), 0, bytesRef); - final DocsEnum docsEnum = reader.termDocsEnum(delDocs, fieldName, bytesRef); + final DocsEnum docsEnum = context.reader.termDocsEnum(delDocs, fieldName, bytesRef); if (docsEnum == null) continue; // iterate through all documents // which have this boxId diff --git a/lucene/contrib/spatial/src/java/org/apache/lucene/spatial/tier/DistanceFieldComparatorSource.java b/lucene/contrib/spatial/src/java/org/apache/lucene/spatial/tier/DistanceFieldComparatorSource.java index 262916a9c68..dec1f88b14c 100644 --- a/lucene/contrib/spatial/src/java/org/apache/lucene/spatial/tier/DistanceFieldComparatorSource.java +++ b/lucene/contrib/spatial/src/java/org/apache/lucene/spatial/tier/DistanceFieldComparatorSource.java @@ -19,7 +19,7 @@ package org.apache.lucene.spatial.tier; import java.io.IOException; -import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexReader.AtomicReaderContext; import org.apache.lucene.search.Filter; import org.apache.lucene.search.FieldComparator; import org.apache.lucene.search.FieldComparatorSource; @@ -108,16 +108,14 @@ public class DistanceFieldComparatorSource extends FieldComparatorSource { } - @Override - public FieldComparator setNextReader(IndexReader reader, int docBase) - throws IOException { - - // each reader in a segmented base - // has an offset based on the maxDocs of previous readers - offset = docBase; - - return this; - } + @Override + public FieldComparator setNextReader(AtomicReaderContext context) + throws IOException { + // each reader in a segmented base + // has an offset based on the maxDocs of previous readers + offset = context.docBase; + return this; + } @Override public Comparable value(int slot) { diff --git a/lucene/contrib/spatial/src/java/org/apache/lucene/spatial/tier/LatLongDistanceFilter.java b/lucene/contrib/spatial/src/java/org/apache/lucene/spatial/tier/LatLongDistanceFilter.java index 44fba384701..94c3bd86ba0 100644 --- a/lucene/contrib/spatial/src/java/org/apache/lucene/spatial/tier/LatLongDistanceFilter.java +++ b/lucene/contrib/spatial/src/java/org/apache/lucene/spatial/tier/LatLongDistanceFilter.java @@ -18,7 +18,8 @@ package org.apache.lucene.spatial.tier; import java.io.IOException; -import org.apache.lucene.index.IndexReader; + +import org.apache.lucene.index.IndexReader.AtomicReaderContext; import org.apache.lucene.search.FilteredDocIdSet; import org.apache.lucene.search.FieldCache; import org.apache.lucene.search.Filter; @@ -64,15 +65,15 @@ public class LatLongDistanceFilter extends DistanceFilter { } @Override - public DocIdSet getDocIdSet(IndexReader reader) throws IOException { + public DocIdSet getDocIdSet(AtomicReaderContext context) throws IOException { - final double[] latIndex = FieldCache.DEFAULT.getDoubles(reader, latField); - final double[] lngIndex = FieldCache.DEFAULT.getDoubles(reader, lngField); + final double[] latIndex = FieldCache.DEFAULT.getDoubles(context.reader, latField); + final double[] lngIndex = FieldCache.DEFAULT.getDoubles(context.reader, lngField); final int docBase = nextDocBase; - nextDocBase += reader.maxDoc(); + nextDocBase += context.reader.maxDoc(); - return new FilteredDocIdSet(startingFilter.getDocIdSet(reader)) { + return new FilteredDocIdSet(startingFilter.getDocIdSet(context)) { @Override protected boolean match(int doc) { double x = latIndex[doc]; diff --git a/lucene/contrib/spatial/src/test/org/apache/lucene/spatial/geometry/TestDistanceUnits.java b/lucene/contrib/spatial/src/test/org/apache/lucene/spatial/geometry/TestDistanceUnits.java index 509e7009799..f1758859cf9 100644 --- a/lucene/contrib/spatial/src/test/org/apache/lucene/spatial/geometry/TestDistanceUnits.java +++ b/lucene/contrib/spatial/src/test/org/apache/lucene/spatial/geometry/TestDistanceUnits.java @@ -1,5 +1,22 @@ package org.apache.lucene.spatial.geometry; +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + import org.apache.lucene.util.LuceneTestCase; import org.junit.Test; diff --git a/lucene/contrib/spatial/src/test/org/apache/lucene/spatial/tier/TestDistance.java b/lucene/contrib/spatial/src/test/org/apache/lucene/spatial/tier/TestDistance.java index 0e0a787f8fe..7aaa919a335 100644 --- a/lucene/contrib/spatial/src/test/org/apache/lucene/spatial/tier/TestDistance.java +++ b/lucene/contrib/spatial/src/test/org/apache/lucene/spatial/tier/TestDistance.java @@ -22,14 +22,17 @@ import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.NumericField; +import org.apache.lucene.index.IndexReader.AtomicReaderContext; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.Term; import org.apache.lucene.index.IndexReader; import org.apache.lucene.search.QueryWrapperFilter; import org.apache.lucene.search.MatchAllDocsQuery; import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.util.ReaderUtil; import org.apache.lucene.store.Directory; + public class TestDistance extends LuceneTestCase { private Directory directory; @@ -96,13 +99,13 @@ public class TestDistance extends LuceneTestCase { public void testLatLongFilterOnDeletedDocs() throws Exception { writer.deleteDocuments(new Term("name", "Potomac")); - IndexReader r = IndexReader.open(writer); + IndexReader r = IndexReader.open(writer, true); LatLongDistanceFilter f = new LatLongDistanceFilter(new QueryWrapperFilter(new MatchAllDocsQuery()), lat, lng, 1.0, latField, lngField); - IndexReader[] readers = r.getSequentialSubReaders(); - for(int i=0;i - - - - 4.0.0 - - org.apache.lucene - lucene-contrib - @version@ - - org.apache.lucene - lucene-spellchecker - Lucene Spellchecker - @version@ - Spell Checker - jar - diff --git a/lucene/contrib/swing/pom.xml.template b/lucene/contrib/swing/pom.xml.template deleted file mode 100644 index 2ebf3bc9d03..00000000000 --- a/lucene/contrib/swing/pom.xml.template +++ /dev/null @@ -1,36 +0,0 @@ - - - - - 4.0.0 - - org.apache.lucene - lucene-contrib - @version@ - - org.apache.lucene - lucene-swing - Lucene Swing - @version@ - Swing Models - jar - diff --git a/lucene/contrib/swing/src/java/org/apache/lucene/swing/models/ListSearcher.java b/lucene/contrib/swing/src/java/org/apache/lucene/swing/models/ListSearcher.java index 611b063baef..e8d2b7765c0 100644 --- a/lucene/contrib/swing/src/java/org/apache/lucene/swing/models/ListSearcher.java +++ b/lucene/contrib/swing/src/java/org/apache/lucene/swing/models/ListSearcher.java @@ -32,6 +32,7 @@ import org.apache.lucene.document.Fieldable; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.index.IndexReader.AtomicReaderContext; import org.apache.lucene.queryParser.MultiFieldQueryParser; import org.apache.lucene.search.Collector; import org.apache.lucene.search.IndexSearcher; @@ -192,7 +193,7 @@ public class ListSearcher extends AbstractListModel { } @Override - public void setNextReader(IndexReader reader, int docBase) {} + public void setNextReader(AtomicReaderContext context) {} @Override public boolean acceptsDocsOutOfOrder() { return true; diff --git a/lucene/contrib/wordnet/pom.xml.template b/lucene/contrib/wordnet/pom.xml.template deleted file mode 100644 index e9518229364..00000000000 --- a/lucene/contrib/wordnet/pom.xml.template +++ /dev/null @@ -1,37 +0,0 @@ - - - - - 4.0.0 - - org.apache.lucene - lucene-contrib - @version@ - - org.apache.lucene - lucene-wordnet - Lucene Wordnet - @version@ - WordNet - jar - - diff --git a/lucene/contrib/wordnet/src/java/org/apache/lucene/wordnet/SynExpand.java b/lucene/contrib/wordnet/src/java/org/apache/lucene/wordnet/SynExpand.java index 908cfd66eff..646abf73dbd 100755 --- a/lucene/contrib/wordnet/src/java/org/apache/lucene/wordnet/SynExpand.java +++ b/lucene/contrib/wordnet/src/java/org/apache/lucene/wordnet/SynExpand.java @@ -33,6 +33,7 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.document.Document; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Term; +import org.apache.lucene.index.IndexReader.AtomicReaderContext; import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.Collector; @@ -160,9 +161,9 @@ public final class SynExpand { } @Override - public void setNextReader(IndexReader reader, int docBase) + public void setNextReader(AtomicReaderContext context) throws IOException { - this.reader = reader; + this.reader = context.reader; } @Override diff --git a/lucene/contrib/wordnet/src/java/org/apache/lucene/wordnet/SynLookup.java b/lucene/contrib/wordnet/src/java/org/apache/lucene/wordnet/SynLookup.java index 066df71ba02..4cc4836cc5b 100644 --- a/lucene/contrib/wordnet/src/java/org/apache/lucene/wordnet/SynLookup.java +++ b/lucene/contrib/wordnet/src/java/org/apache/lucene/wordnet/SynLookup.java @@ -32,6 +32,7 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.document.Document; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Term; +import org.apache.lucene.index.IndexReader.AtomicReaderContext; import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.Collector; @@ -59,7 +60,7 @@ public class SynLookup { } @Override - public void setNextReader(IndexReader reader, int docBase) {} + public void setNextReader(AtomicReaderContext context) {} @Override public boolean acceptsDocsOutOfOrder() { return true; @@ -169,9 +170,9 @@ public class SynLookup { } @Override - public void setNextReader(IndexReader reader, int docBase) + public void setNextReader(AtomicReaderContext context) throws IOException { - this.reader = reader; + this.reader = context.reader; } @Override diff --git a/lucene/contrib/wordnet/src/java/org/apache/lucene/wordnet/SynonymMap.java b/lucene/contrib/wordnet/src/java/org/apache/lucene/wordnet/SynonymMap.java index 455c8118c5a..099d653bef1 100644 --- a/lucene/contrib/wordnet/src/java/org/apache/lucene/wordnet/SynonymMap.java +++ b/lucene/contrib/wordnet/src/java/org/apache/lucene/wordnet/SynonymMap.java @@ -52,15 +52,17 @@ import java.util.TreeSet; * high-frequency lookups of medium size synonym tables. *

* Example Usage: - *

+ * 
  * String[] words = new String[] { "hard", "woods", "forest", "wolfish", "xxxx"};
  * SynonymMap map = new SynonymMap(new FileInputStream("samples/fulltext/wn_s.pl"));
  * for (int i = 0; i < words.length; i++) {
  *     String[] synonyms = map.getSynonyms(words[i]);
  *     System.out.println(words[i] + ":" + java.util.Arrays.asList(synonyms).toString());
  * }
- * 
+ * 
+ * * Example output: + *
  * hard:[arduous, backbreaking, difficult, fermented, firmly, grueling, gruelling, heavily, heavy, intemperately, knockout, laborious, punishing, severe, severely, strong, toilsome, tough]
  * woods:[forest, wood]
  * forest:[afforest, timber, timberland, wood, woodland, woods]
@@ -161,7 +163,7 @@ public class SynonymMap {
     return word.toLowerCase();
   }
 
-  private static boolean isValid(String str) {
+  protected boolean isValid(String str) {
     for (int i=str.length(); --i >= 0; ) {
       if (!Character.isLetter(str.charAt(i))) return false;
     }
@@ -395,4 +397,4 @@ public class SynonymMap {
     }
   }
   
-}
\ No newline at end of file
+}
diff --git a/lucene/contrib/xml-query-parser/pom.xml.template b/lucene/contrib/xml-query-parser/pom.xml.template
deleted file mode 100644
index 4c1e7f8a5a5..00000000000
--- a/lucene/contrib/xml-query-parser/pom.xml.template
+++ /dev/null
@@ -1,43 +0,0 @@
-
-
-  
-
-  4.0.0
-  
-    org.apache.lucene
-    lucene-contrib
-    @version@
-  
-  org.apache.lucene
-  lucene-xml-query-parser
-  Lucene XML Query Parser
-  @version@
-  XML query parser
-  jar
-  
-    
-      org.apache.lucene
-      lucene-queries
-      @version@
-    
-  
-
diff --git a/lucene/contrib/xml-query-parser/src/java/org/apache/lucene/xmlparser/builders/NumericRangeFilterBuilder.java b/lucene/contrib/xml-query-parser/src/java/org/apache/lucene/xmlparser/builders/NumericRangeFilterBuilder.java
index c834f8e4d98..ea5f5741c34 100644
--- a/lucene/contrib/xml-query-parser/src/java/org/apache/lucene/xmlparser/builders/NumericRangeFilterBuilder.java
+++ b/lucene/contrib/xml-query-parser/src/java/org/apache/lucene/xmlparser/builders/NumericRangeFilterBuilder.java
@@ -19,7 +19,7 @@ package org.apache.lucene.xmlparser.builders;
 
 import java.io.IOException;
 
-import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexReader.AtomicReaderContext;
 import org.apache.lucene.search.DocIdSet;
 import org.apache.lucene.search.Filter;
 import org.apache.lucene.search.NumericRangeFilter;
@@ -157,7 +157,7 @@ public class NumericRangeFilterBuilder implements FilterBuilder {
 		private static final long serialVersionUID = 1L;
 
 		@Override
-		public DocIdSet getDocIdSet(IndexReader reader) throws IOException {
+		public DocIdSet getDocIdSet(AtomicReaderContext context) throws IOException {
 			return null;
 		}
 
diff --git a/lucene/contrib/xml-query-parser/src/test/org/apache/lucene/xmlparser/TestParser.java b/lucene/contrib/xml-query-parser/src/test/org/apache/lucene/xmlparser/TestParser.java
index ad266b52f19..6122b8aab6a 100644
--- a/lucene/contrib/xml-query-parser/src/test/org/apache/lucene/xmlparser/TestParser.java
+++ b/lucene/contrib/xml-query-parser/src/test/org/apache/lucene/xmlparser/TestParser.java
@@ -73,7 +73,7 @@ public class TestParser extends LuceneTestCase {
 			d.close();
       writer.close();
 		reader=IndexReader.open(dir, true);
-		searcher=new IndexSearcher(reader);
+		searcher=newSearcher(reader);
 		
 	}
 	
@@ -215,7 +215,10 @@ public class TestParser extends LuceneTestCase {
 	}
 	private void dumpResults(String qType,Query q, int numDocs) throws IOException
 	{
-		TopDocs hits = searcher.search(q, null, numDocs);
+                if (VERBOSE) {
+                  System.out.println("TEST: query=" + q);
+                }
+                TopDocs hits = searcher.search(q, null, numDocs);
 		assertTrue(qType +" should produce results ", hits.totalHits>0);
 		if(VERBOSE)
 		{
diff --git a/lucene/contrib/xml-query-parser/src/test/org/apache/lucene/xmlparser/builders/TestNumericRangeFilterBuilder.java b/lucene/contrib/xml-query-parser/src/test/org/apache/lucene/xmlparser/builders/TestNumericRangeFilterBuilder.java
index dca574dd0bf..028cc752b85 100644
--- a/lucene/contrib/xml-query-parser/src/test/org/apache/lucene/xmlparser/builders/TestNumericRangeFilterBuilder.java
+++ b/lucene/contrib/xml-query-parser/src/test/org/apache/lucene/xmlparser/builders/TestNumericRangeFilterBuilder.java
@@ -28,7 +28,9 @@ import javax.xml.parsers.ParserConfigurationException;
 import org.apache.lucene.util.LuceneTestCase;
 
 import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexReader.AtomicReaderContext;
 import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.SlowMultiReaderWrapper;
 import org.apache.lucene.search.Filter;
 import org.apache.lucene.search.NumericRangeFilter;
 import org.apache.lucene.store.Directory;
@@ -64,10 +66,10 @@ public class TestNumericRangeFilterBuilder extends LuceneTestCase {
 		writer.commit();
 		try
 		{
-			IndexReader reader = IndexReader.open(ramDir, true);
+			IndexReader reader = new SlowMultiReaderWrapper(IndexReader.open(ramDir, true));
 			try
 			{
-				assertNull(filter.getDocIdSet(reader));
+				assertNull(filter.getDocIdSet((AtomicReaderContext) reader.getTopReaderContext()));
 			}
 			finally
 			{
diff --git a/lucene/lucene-contrib-pom.xml.template b/lucene/lucene-contrib-pom.xml.template
deleted file mode 100644
index 9c33fb836c1..00000000000
--- a/lucene/lucene-contrib-pom.xml.template
+++ /dev/null
@@ -1,50 +0,0 @@
-
-
-  
-
-  4.0.0
-  
-    org.apache.lucene
-    lucene-parent
-    @version@
-  
-  lucene-contrib
-  Lucene Java Contrib POM
-  @version@
-  pom
-  
-    
-      org.apache.lucene
-      lucene-core
-      @version@
-    
-  
-  
-    1.0.4
-    1.7
-    3.1
-    1.7.0
-    1.4
-    3.3.93
-    4.0
-  
-
diff --git a/lucene/lucene-core-pom.xml.template b/lucene/lucene-core-pom.xml.template
deleted file mode 100644
index da3a14e32a5..00000000000
--- a/lucene/lucene-core-pom.xml.template
+++ /dev/null
@@ -1,36 +0,0 @@
-
-
-  
-
-  
-    org.apache.lucene
-    lucene-parent
-    @version@
-  
-  4.0.0
-  org.apache.lucene
-  lucene-core
-  Lucene Core
-  @version@
-  Apache Lucene Java Core
-  jar
-
diff --git a/lucene/lucene-parent-pom.xml.template b/lucene/lucene-parent-pom.xml.template
deleted file mode 100644
index 3f01c87cd3c..00000000000
--- a/lucene/lucene-parent-pom.xml.template
+++ /dev/null
@@ -1,89 +0,0 @@
-
-
-  
-  
-    org.apache
-    apache
-    4
-  
-  4.0.0
-  org.apache.lucene
-  lucene-parent
-  Lucene Java POM
-  @version@
-  Apache Lucene Java POM
-  http://lucene.apache.org/java
-  pom
-  
-    JIRA
-    http://issues.apache.org/jira/browse/LUCENE
-  
-  
-    Hudson
-    
-      http://lucene.zones.apache.org:8080/hudson/job/Lucene-Nightly/
-    
-  
-  
-    
-      Java User List
-      java-user-subscribe@lucene.apache.org
-      java-user-unsubscribe@lucene.apache.org
-      
-        http://mail-archives.apache.org/mod_mbox/java-user/
-      
-    
-    
-      Java Developer List
-      java-dev-subscribe@lucene.apache.org
-      java-dev-unsubscribe@lucene.apache.org
-      
-        http://mail-archives.apache.org/mod_mbox/java-dev/
-      
-    
-    
-      Java Commits List
-      java-commits-subscribe@lucene.apache.org
-      
-        java-commits-unsubscribe@lucene.apache.org
-      
-      
-        http://mail-archives.apache.org/mod_mbox/java-commits/
-      
-    
-  
-  2000
-  
-    
-      Apache 2
-      http://www.apache.org/licenses/LICENSE-2.0.txt
-    
-  
-  
-    
-      scm:svn:http://svn.apache.org/repos/asf/lucene/dev
-    
-    
-      scm:svn:https://svn.apache.org/repos/asf/lucene/dev
-    
-  
-
diff --git a/lucene/src/java/org/apache/lucene/analysis/NumericTokenStream.java b/lucene/src/java/org/apache/lucene/analysis/NumericTokenStream.java
index bed4c06c1a6..b98a24646ca 100644
--- a/lucene/src/java/org/apache/lucene/analysis/NumericTokenStream.java
+++ b/lucene/src/java/org/apache/lucene/analysis/NumericTokenStream.java
@@ -19,6 +19,7 @@ package org.apache.lucene.analysis;
 
 import org.apache.lucene.util.Attribute;
 import org.apache.lucene.util.AttributeImpl;
+import org.apache.lucene.util.AttributeReflector;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.NumericUtils;
 import org.apache.lucene.document.NumericField; // for javadocs
@@ -83,8 +84,6 @@ import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
  * href="../search/NumericRangeQuery.html#precisionStepDesc">precisionStep
  * parameter as well as how numeric fields work under the hood.

* - * @lucene.experimental - * * @since 2.9 */ public final class NumericTokenStream extends TokenStream { @@ -95,22 +94,34 @@ public final class NumericTokenStream extends TokenStream { /** The lower precision tokens gets this token type assigned. */ public static final String TOKEN_TYPE_LOWER_PREC = "lowerPrecNumeric"; - /** Expert: Use this attribute to get the details of the currently generated token + /** Expert: Use this attribute to get the details of the currently generated token. * @lucene.experimental * @since 4.0 */ public interface NumericTermAttribute extends Attribute { /** Returns current shift value, undefined before first token */ int getShift(); - /** Returns {@link NumericTokenStream}'s raw value as {@code long} */ + /** Returns current token's raw value as {@code long} with all {@link #getShift} applied, undefined before first token */ long getRawValue(); /** Returns value size in bits (32 for {@code float}, {@code int}; 64 for {@code double}, {@code long}) */ int getValueSize(); + + /** Don't call this method! + * @lucene.internal */ + void init(long value, int valSize, int precisionStep, int shift); + + /** Don't call this method! + * @lucene.internal */ + void setShift(int shift); + + /** Don't call this method! + * @lucene.internal */ + int incShift(); } + // just a wrapper to prevent adding CTA private static final class NumericAttributeFactory extends AttributeFactory { private final AttributeFactory delegate; - private NumericTokenStream ts = null; NumericAttributeFactory(AttributeFactory delegate) { this.delegate = delegate; @@ -118,72 +129,79 @@ public final class NumericTokenStream extends TokenStream { @Override public AttributeImpl createAttributeInstance(Class attClass) { - if (attClass == NumericTermAttribute.class) - return new NumericTermAttributeImpl(ts); if (CharTermAttribute.class.isAssignableFrom(attClass)) throw new IllegalArgumentException("NumericTokenStream does not support CharTermAttribute."); return delegate.createAttributeInstance(attClass); } } - private static final class NumericTermAttributeImpl extends AttributeImpl implements NumericTermAttribute,TermToBytesRefAttribute { - private final NumericTokenStream ts; + /** Implementation of {@link NumericTermAttribute}. + * @lucene.internal + * @since 4.0 + */ + public static final class NumericTermAttributeImpl extends AttributeImpl implements NumericTermAttribute,TermToBytesRefAttribute { + private long value = 0L; + private int valueSize = 0, shift = 0, precisionStep = 0; - public NumericTermAttributeImpl(NumericTokenStream ts) { - this.ts = ts; - } - public int toBytesRef(BytesRef bytes) { try { - assert ts.valSize == 64 || ts.valSize == 32; - return (ts.valSize == 64) ? - NumericUtils.longToPrefixCoded(ts.value, ts.shift, bytes) : - NumericUtils.intToPrefixCoded((int) ts.value, ts.shift, bytes); + assert valueSize == 64 || valueSize == 32; + return (valueSize == 64) ? + NumericUtils.longToPrefixCoded(value, shift, bytes) : + NumericUtils.intToPrefixCoded((int) value, shift, bytes); } catch (IllegalArgumentException iae) { - // return empty token before first + // return empty token before first or after last bytes.length = 0; return 0; } } - public int getShift() { return ts.shift; } - public long getRawValue() { return ts.value; } - public int getValueSize() { return ts.valSize; } + public int getShift() { return shift; } + public void setShift(int shift) { this.shift = shift; } + public int incShift() { + return (shift += precisionStep); + } + + public long getRawValue() { return value & ~((1L << shift) - 1L); } + public int getValueSize() { return valueSize; } + + public void init(long value, int valueSize, int precisionStep, int shift) { + this.value = value; + this.valueSize = valueSize; + this.precisionStep = precisionStep; + this.shift = shift; + } @Override public void clear() { - // this attribute has no contents to clear - } - - @Override - public boolean equals(Object other) { - return other == this; - } - - @Override - public int hashCode() { - return System.identityHashCode(this); + // this attribute has no contents to clear! + // we keep it untouched as it's fully controlled by outer class. } + @Override + public void reflectWith(AttributeReflector reflector) { + final BytesRef bytes = new BytesRef(); + toBytesRef(bytes); + reflector.reflect(TermToBytesRefAttribute.class, "bytes", bytes); + reflector.reflect(NumericTermAttribute.class, "shift", shift); + reflector.reflect(NumericTermAttribute.class, "rawValue", getRawValue()); + reflector.reflect(NumericTermAttribute.class, "valueSize", valueSize); + } + @Override public void copyTo(AttributeImpl target) { - // this attribute has no contents to copy - } - - @Override - public Object clone() { - // cannot throw CloneNotSupportedException (checked) - throw new UnsupportedOperationException(); + final NumericTermAttribute a = (NumericTermAttribute) target; + a.init(value, valueSize, precisionStep, shift); } } - + /** * Creates a token stream for numeric values using the default precisionStep * {@link NumericUtils#PRECISION_STEP_DEFAULT} (4). The stream is not yet initialized, * before using set a value using the various set???Value() methods. */ public NumericTokenStream() { - this(NumericUtils.PRECISION_STEP_DEFAULT); + this(AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY, NumericUtils.PRECISION_STEP_DEFAULT); } /** @@ -192,15 +210,7 @@ public final class NumericTokenStream extends TokenStream { * before using set a value using the various set???Value() methods. */ public NumericTokenStream(final int precisionStep) { - super(new NumericAttributeFactory(AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY)); - // we must do this after the super call :( - ((NumericAttributeFactory) getAttributeFactory()).ts = this; - addAttribute(NumericTermAttribute.class); - - this.precisionStep = precisionStep; - if (precisionStep < 1) - throw new IllegalArgumentException("precisionStep must be >=1"); - shift = -precisionStep; + this(AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY, precisionStep); } /** @@ -212,14 +222,10 @@ public final class NumericTokenStream extends TokenStream { */ public NumericTokenStream(AttributeFactory factory, final int precisionStep) { super(new NumericAttributeFactory(factory)); - // we must do this after the super call :( - ((NumericAttributeFactory) getAttributeFactory()).ts = this; - addAttribute(NumericTermAttribute.class); - - this.precisionStep = precisionStep; if (precisionStep < 1) throw new IllegalArgumentException("precisionStep must be >=1"); - shift = -precisionStep; + this.precisionStep = precisionStep; + numericAtt.setShift(-precisionStep); } /** @@ -229,9 +235,7 @@ public final class NumericTokenStream extends TokenStream { * new Field(name, new NumericTokenStream(precisionStep).setLongValue(value)) */ public NumericTokenStream setLongValue(final long value) { - this.value = value; - valSize = 64; - shift = -precisionStep; + numericAtt.init(value, valSize = 64, precisionStep, -precisionStep); return this; } @@ -242,9 +246,7 @@ public final class NumericTokenStream extends TokenStream { * new Field(name, new NumericTokenStream(precisionStep).setIntValue(value)) */ public NumericTokenStream setIntValue(final int value) { - this.value = value; - valSize = 32; - shift = -precisionStep; + numericAtt.init(value, valSize = 32, precisionStep, -precisionStep); return this; } @@ -255,9 +257,7 @@ public final class NumericTokenStream extends TokenStream { * new Field(name, new NumericTokenStream(precisionStep).setDoubleValue(value)) */ public NumericTokenStream setDoubleValue(final double value) { - this.value = NumericUtils.doubleToSortableLong(value); - valSize = 64; - shift = -precisionStep; + numericAtt.init(NumericUtils.doubleToSortableLong(value), valSize = 64, precisionStep, -precisionStep); return this; } @@ -268,9 +268,7 @@ public final class NumericTokenStream extends TokenStream { * new Field(name, new NumericTokenStream(precisionStep).setFloatValue(value)) */ public NumericTokenStream setFloatValue(final float value) { - this.value = NumericUtils.floatToSortableInt(value); - valSize = 32; - shift = -precisionStep; + numericAtt.init(NumericUtils.floatToSortableInt(value), valSize = 32, precisionStep, -precisionStep); return this; } @@ -278,40 +276,28 @@ public final class NumericTokenStream extends TokenStream { public void reset() { if (valSize == 0) throw new IllegalStateException("call set???Value() before usage"); - shift = -precisionStep; + numericAtt.setShift(-precisionStep); } @Override public boolean incrementToken() { if (valSize == 0) throw new IllegalStateException("call set???Value() before usage"); - shift += precisionStep; - if (shift >= valSize) { - // reset so the attribute still works after exhausted stream - shift -= precisionStep; - return false; - } - + + // this will only clear all other attributes in this TokenStream clearAttributes(); - // the TermToBytesRefAttribute is directly accessing shift & value. + + final int shift = numericAtt.incShift(); typeAtt.setType((shift == 0) ? TOKEN_TYPE_FULL_PREC : TOKEN_TYPE_LOWER_PREC); posIncrAtt.setPositionIncrement((shift == 0) ? 1 : 0); - return true; - } - - @Override - public String toString() { - final StringBuilder sb = new StringBuilder("(numeric,valSize=").append(valSize); - sb.append(",precisionStep=").append(precisionStep).append(')'); - return sb.toString(); + return (shift < valSize); } // members + private final NumericTermAttribute numericAtt = addAttribute(NumericTermAttribute.class); private final TypeAttribute typeAtt = addAttribute(TypeAttribute.class); private final PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class); - int shift, valSize = 0; // valSize==0 means not initialized + private int valSize = 0; // valSize==0 means not initialized private final int precisionStep; - - long value = 0L; } diff --git a/lucene/src/java/org/apache/lucene/analysis/Token.java b/lucene/src/java/org/apache/lucene/analysis/Token.java index a50b934377c..80c31ec4189 100644 --- a/lucene/src/java/org/apache/lucene/analysis/Token.java +++ b/lucene/src/java/org/apache/lucene/analysis/Token.java @@ -28,6 +28,7 @@ import org.apache.lucene.index.DocsAndPositionsEnum; // for javadoc import org.apache.lucene.util.Attribute; import org.apache.lucene.util.AttributeSource; import org.apache.lucene.util.AttributeImpl; +import org.apache.lucene.util.AttributeReflector; /** A Token is an occurrence of a term from the text of a field. It consists of @@ -588,6 +589,17 @@ public class Token extends CharTermAttributeImpl } } + @Override + public void reflectWith(AttributeReflector reflector) { + super.reflectWith(reflector); + reflector.reflect(OffsetAttribute.class, "startOffset", startOffset); + reflector.reflect(OffsetAttribute.class, "endOffset", endOffset); + reflector.reflect(PositionIncrementAttribute.class, "positionIncrement", positionIncrement); + reflector.reflect(PayloadAttribute.class, "payload", payload); + reflector.reflect(FlagsAttribute.class, "flags", flags); + reflector.reflect(TypeAttribute.class, "type", type); + } + /** Convenience factory that returns Token as implementation for the basic * attributes and return the default impl (with "Impl" appended) for all other * attributes. diff --git a/lucene/src/java/org/apache/lucene/analysis/package.html b/lucene/src/java/org/apache/lucene/analysis/package.html index d98f84f5d66..28569e483ba 100644 --- a/lucene/src/java/org/apache/lucene/analysis/package.html +++ b/lucene/src/java/org/apache/lucene/analysis/package.html @@ -305,7 +305,7 @@ with the TokenStream.
  • Attribute instances are reused for all tokens of a document. Thus, a TokenStream/-Filter needs to update the appropriate Attribute(s) in incrementToken(). The consumer, commonly the Lucene indexer, consumes the data in the -Attributes and then calls incrementToken() again until it retuns false, which indicates that the end of the stream +Attributes and then calls incrementToken() again until it returns false, which indicates that the end of the stream was reached. This means that in each call of incrementToken() a TokenStream/-Filter can safely overwrite the data in the Attribute instances.
  • diff --git a/lucene/src/java/org/apache/lucene/analysis/tokenattributes/CharTermAttributeImpl.java b/lucene/src/java/org/apache/lucene/analysis/tokenattributes/CharTermAttributeImpl.java index 4268abc0db6..d45d280f73c 100644 --- a/lucene/src/java/org/apache/lucene/analysis/tokenattributes/CharTermAttributeImpl.java +++ b/lucene/src/java/org/apache/lucene/analysis/tokenattributes/CharTermAttributeImpl.java @@ -23,6 +23,7 @@ import java.nio.CharBuffer; import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.AttributeImpl; import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.AttributeReflector; import org.apache.lucene.util.RamUsageEstimator; import org.apache.lucene.util.UnicodeUtil; @@ -243,6 +244,14 @@ public class CharTermAttributeImpl extends AttributeImpl implements CharTermAttr return new String(termBuffer, 0, termLength); } + @Override + public void reflectWith(AttributeReflector reflector) { + reflector.reflect(CharTermAttribute.class, "term", toString()); + final BytesRef bytes = new BytesRef(); + toBytesRef(bytes); + reflector.reflect(TermToBytesRefAttribute.class, "bytes", bytes); + } + @Override public void copyTo(AttributeImpl target) { CharTermAttribute t = (CharTermAttribute) target; diff --git a/lucene/src/java/org/apache/lucene/document/AbstractField.java b/lucene/src/java/org/apache/lucene/document/AbstractField.java index 54ea023ba28..0fb6f8795f2 100755 --- a/lucene/src/java/org/apache/lucene/document/AbstractField.java +++ b/lucene/src/java/org/apache/lucene/document/AbstractField.java @@ -81,7 +81,7 @@ public abstract class AbstractField implements Fieldable { * used to compute the norm factor for the field. By * default, in the {@link * org.apache.lucene.search.Similarity#computeNorm(String, - * FieldInvertState)} method, the boost value is multipled + * FieldInvertState)} method, the boost value is multiplied * by the {@link * org.apache.lucene.search.Similarity#lengthNorm(String, * int)} and then @@ -103,7 +103,7 @@ public abstract class AbstractField implements Fieldable { * *

    Note: this value is not stored directly with the document in the index. * Documents returned from {@link org.apache.lucene.index.IndexReader#document(int)} and - * {@link org.apache.lucene.search.Searcher#doc(int)} may thus not have the same value present as when + * {@link org.apache.lucene.search.IndexSearcher#doc(int)} may thus not have the same value present as when * this field was indexed. * * @see #setBoost(float) diff --git a/lucene/src/java/org/apache/lucene/document/DateTools.java b/lucene/src/java/org/apache/lucene/document/DateTools.java index 68cb2dfdf25..0e5199c6247 100644 --- a/lucene/src/java/org/apache/lucene/document/DateTools.java +++ b/lucene/src/java/org/apache/lucene/document/DateTools.java @@ -47,28 +47,37 @@ import org.apache.lucene.util.NumericUtils; // for javadocs */ public class DateTools { - private final static TimeZone GMT = TimeZone.getTimeZone("GMT"); + private static final class DateFormats { + final static TimeZone GMT = TimeZone.getTimeZone("GMT"); - private static final SimpleDateFormat YEAR_FORMAT = new SimpleDateFormat("yyyy", Locale.US); - private static final SimpleDateFormat MONTH_FORMAT = new SimpleDateFormat("yyyyMM", Locale.US); - private static final SimpleDateFormat DAY_FORMAT = new SimpleDateFormat("yyyyMMdd", Locale.US); - private static final SimpleDateFormat HOUR_FORMAT = new SimpleDateFormat("yyyyMMddHH", Locale.US); - private static final SimpleDateFormat MINUTE_FORMAT = new SimpleDateFormat("yyyyMMddHHmm", Locale.US); - private static final SimpleDateFormat SECOND_FORMAT = new SimpleDateFormat("yyyyMMddHHmmss", Locale.US); - private static final SimpleDateFormat MILLISECOND_FORMAT = new SimpleDateFormat("yyyyMMddHHmmssSSS", Locale.US); - static { - // times need to be normalized so the value doesn't depend on the - // location the index is created/used: - YEAR_FORMAT.setTimeZone(GMT); - MONTH_FORMAT.setTimeZone(GMT); - DAY_FORMAT.setTimeZone(GMT); - HOUR_FORMAT.setTimeZone(GMT); - MINUTE_FORMAT.setTimeZone(GMT); - SECOND_FORMAT.setTimeZone(GMT); - MILLISECOND_FORMAT.setTimeZone(GMT); + final SimpleDateFormat YEAR_FORMAT = new SimpleDateFormat("yyyy", Locale.US); + final SimpleDateFormat MONTH_FORMAT = new SimpleDateFormat("yyyyMM", Locale.US); + final SimpleDateFormat DAY_FORMAT = new SimpleDateFormat("yyyyMMdd", Locale.US); + final SimpleDateFormat HOUR_FORMAT = new SimpleDateFormat("yyyyMMddHH", Locale.US); + final SimpleDateFormat MINUTE_FORMAT = new SimpleDateFormat("yyyyMMddHHmm", Locale.US); + final SimpleDateFormat SECOND_FORMAT = new SimpleDateFormat("yyyyMMddHHmmss", Locale.US); + final SimpleDateFormat MILLISECOND_FORMAT = new SimpleDateFormat("yyyyMMddHHmmssSSS", Locale.US); + { + // times need to be normalized so the value doesn't depend on the + // location the index is created/used: + YEAR_FORMAT.setTimeZone(GMT); + MONTH_FORMAT.setTimeZone(GMT); + DAY_FORMAT.setTimeZone(GMT); + HOUR_FORMAT.setTimeZone(GMT); + MINUTE_FORMAT.setTimeZone(GMT); + SECOND_FORMAT.setTimeZone(GMT); + MILLISECOND_FORMAT.setTimeZone(GMT); + } + + final Calendar calInstance = Calendar.getInstance(GMT, Locale.US); } - - private static final Calendar calInstance = Calendar.getInstance(GMT); + + private static final ThreadLocal FORMATS = new ThreadLocal() { + @Override + protected DateFormats initialValue() { + return new DateFormats(); + } + }; // cannot create, the class has static methods only private DateTools() {} @@ -82,7 +91,7 @@ public class DateTools { * @return a string in format yyyyMMddHHmmssSSS or shorter, * depending on resolution; using GMT as timezone */ - public static synchronized String dateToString(Date date, Resolution resolution) { + public static String dateToString(Date date, Resolution resolution) { return timeToString(date.getTime(), resolution); } @@ -95,24 +104,20 @@ public class DateTools { * @return a string in format yyyyMMddHHmmssSSS or shorter, * depending on resolution; using GMT as timezone */ - public static synchronized String timeToString(long time, Resolution resolution) { - calInstance.setTimeInMillis(round(time, resolution)); - Date date = calInstance.getTime(); + public static String timeToString(long time, Resolution resolution) { + final DateFormats formats = FORMATS.get(); - if (resolution == Resolution.YEAR) { - return YEAR_FORMAT.format(date); - } else if (resolution == Resolution.MONTH) { - return MONTH_FORMAT.format(date); - } else if (resolution == Resolution.DAY) { - return DAY_FORMAT.format(date); - } else if (resolution == Resolution.HOUR) { - return HOUR_FORMAT.format(date); - } else if (resolution == Resolution.MINUTE) { - return MINUTE_FORMAT.format(date); - } else if (resolution == Resolution.SECOND) { - return SECOND_FORMAT.format(date); - } else if (resolution == Resolution.MILLISECOND) { - return MILLISECOND_FORMAT.format(date); + formats.calInstance.setTimeInMillis(round(time, resolution)); + final Date date = formats.calInstance.getTime(); + + switch (resolution) { + case YEAR: return formats.YEAR_FORMAT.format(date); + case MONTH:return formats.MONTH_FORMAT.format(date); + case DAY: return formats.DAY_FORMAT.format(date); + case HOUR: return formats.HOUR_FORMAT.format(date); + case MINUTE: return formats.MINUTE_FORMAT.format(date); + case SECOND: return formats.SECOND_FORMAT.format(date); + case MILLISECOND: return formats.MILLISECOND_FORMAT.format(date); } throw new IllegalArgumentException("unknown resolution " + resolution); @@ -128,7 +133,7 @@ public class DateTools { * @throws ParseException if dateString is not in the * expected format */ - public static synchronized long stringToTime(String dateString) throws ParseException { + public static long stringToTime(String dateString) throws ParseException { return stringToDate(dateString).getTime(); } @@ -142,21 +147,23 @@ public class DateTools { * @throws ParseException if dateString is not in the * expected format */ - public static synchronized Date stringToDate(String dateString) throws ParseException { + public static Date stringToDate(String dateString) throws ParseException { + final DateFormats formats = FORMATS.get(); + if (dateString.length() == 4) { - return YEAR_FORMAT.parse(dateString); + return formats.YEAR_FORMAT.parse(dateString); } else if (dateString.length() == 6) { - return MONTH_FORMAT.parse(dateString); + return formats.MONTH_FORMAT.parse(dateString); } else if (dateString.length() == 8) { - return DAY_FORMAT.parse(dateString); + return formats.DAY_FORMAT.parse(dateString); } else if (dateString.length() == 10) { - return HOUR_FORMAT.parse(dateString); + return formats.HOUR_FORMAT.parse(dateString); } else if (dateString.length() == 12) { - return MINUTE_FORMAT.parse(dateString); + return formats.MINUTE_FORMAT.parse(dateString); } else if (dateString.length() == 14) { - return SECOND_FORMAT.parse(dateString); + return formats.SECOND_FORMAT.parse(dateString); } else if (dateString.length() == 17) { - return MILLISECOND_FORMAT.parse(dateString); + return formats.MILLISECOND_FORMAT.parse(dateString); } throw new ParseException("Input is not valid date string: " + dateString, 0); } @@ -170,7 +177,7 @@ public class DateTools { * @return the date with all values more precise than resolution * set to 0 or 1 */ - public static synchronized Date round(Date date, Resolution resolution) { + public static Date round(Date date, Resolution resolution) { return new Date(round(date.getTime(), resolution)); } @@ -184,67 +191,63 @@ public class DateTools { * @return the date with all values more precise than resolution * set to 0 or 1, expressed as milliseconds since January 1, 1970, 00:00:00 GMT */ - public static synchronized long round(long time, Resolution resolution) { + public static long round(long time, Resolution resolution) { + final Calendar calInstance = FORMATS.get().calInstance; calInstance.setTimeInMillis(time); - if (resolution == Resolution.YEAR) { - calInstance.set(Calendar.MONTH, 0); - calInstance.set(Calendar.DAY_OF_MONTH, 1); - calInstance.set(Calendar.HOUR_OF_DAY, 0); - calInstance.set(Calendar.MINUTE, 0); - calInstance.set(Calendar.SECOND, 0); - calInstance.set(Calendar.MILLISECOND, 0); - } else if (resolution == Resolution.MONTH) { - calInstance.set(Calendar.DAY_OF_MONTH, 1); - calInstance.set(Calendar.HOUR_OF_DAY, 0); - calInstance.set(Calendar.MINUTE, 0); - calInstance.set(Calendar.SECOND, 0); - calInstance.set(Calendar.MILLISECOND, 0); - } else if (resolution == Resolution.DAY) { - calInstance.set(Calendar.HOUR_OF_DAY, 0); - calInstance.set(Calendar.MINUTE, 0); - calInstance.set(Calendar.SECOND, 0); - calInstance.set(Calendar.MILLISECOND, 0); - } else if (resolution == Resolution.HOUR) { - calInstance.set(Calendar.MINUTE, 0); - calInstance.set(Calendar.SECOND, 0); - calInstance.set(Calendar.MILLISECOND, 0); - } else if (resolution == Resolution.MINUTE) { - calInstance.set(Calendar.SECOND, 0); - calInstance.set(Calendar.MILLISECOND, 0); - } else if (resolution == Resolution.SECOND) { - calInstance.set(Calendar.MILLISECOND, 0); - } else if (resolution == Resolution.MILLISECOND) { - // don't cut off anything - } else { - throw new IllegalArgumentException("unknown resolution " + resolution); + switch (resolution) { + case YEAR: + calInstance.set(Calendar.MONTH, 0); + calInstance.set(Calendar.DAY_OF_MONTH, 1); + calInstance.set(Calendar.HOUR_OF_DAY, 0); + calInstance.set(Calendar.MINUTE, 0); + calInstance.set(Calendar.SECOND, 0); + calInstance.set(Calendar.MILLISECOND, 0); + break; + case MONTH: + calInstance.set(Calendar.DAY_OF_MONTH, 1); + calInstance.set(Calendar.HOUR_OF_DAY, 0); + calInstance.set(Calendar.MINUTE, 0); + calInstance.set(Calendar.SECOND, 0); + calInstance.set(Calendar.MILLISECOND, 0); + break; + case DAY: + calInstance.set(Calendar.HOUR_OF_DAY, 0); + calInstance.set(Calendar.MINUTE, 0); + calInstance.set(Calendar.SECOND, 0); + calInstance.set(Calendar.MILLISECOND, 0); + break; + case HOUR: + calInstance.set(Calendar.MINUTE, 0); + calInstance.set(Calendar.SECOND, 0); + calInstance.set(Calendar.MILLISECOND, 0); + break; + case MINUTE: + calInstance.set(Calendar.SECOND, 0); + calInstance.set(Calendar.MILLISECOND, 0); + break; + case SECOND: + calInstance.set(Calendar.MILLISECOND, 0); + break; + case MILLISECOND: + // don't cut off anything + break; + default: + throw new IllegalArgumentException("unknown resolution " + resolution); } return calInstance.getTimeInMillis(); } /** Specifies the time granularity. */ - public static class Resolution { + public static enum Resolution { - public static final Resolution YEAR = new Resolution("year"); - public static final Resolution MONTH = new Resolution("month"); - public static final Resolution DAY = new Resolution("day"); - public static final Resolution HOUR = new Resolution("hour"); - public static final Resolution MINUTE = new Resolution("minute"); - public static final Resolution SECOND = new Resolution("second"); - public static final Resolution MILLISECOND = new Resolution("millisecond"); + YEAR, MONTH, DAY, HOUR, MINUTE, SECOND, MILLISECOND; - private String resolution; - - private Resolution() { - } - - private Resolution(String resolution) { - this.resolution = resolution; - } - + /** this method returns the name of the resolution + * in lowercase (for backwards compatibility) */ @Override public String toString() { - return resolution; + return super.toString().toLowerCase(Locale.ENGLISH); } } diff --git a/lucene/src/java/org/apache/lucene/document/Document.java b/lucene/src/java/org/apache/lucene/document/Document.java index 1dea49d465c..58e7a7ee3c3 100644 --- a/lucene/src/java/org/apache/lucene/document/Document.java +++ b/lucene/src/java/org/apache/lucene/document/Document.java @@ -18,6 +18,7 @@ package org.apache.lucene.document; */ import java.util.*; // for javadoc +import org.apache.lucene.search.IndexSearcher; // for javadoc import org.apache.lucene.search.ScoreDoc; // for javadoc import org.apache.lucene.index.IndexReader; // for javadoc @@ -165,7 +166,7 @@ public final class Document implements java.io.Serializable { /** Returns a List of all the fields in a document. *

    Note that fields which are not {@link Fieldable#isStored() stored} are * not available in documents retrieved from the - * index, e.g. {@link Searcher#doc(int)} or {@link + * index, e.g. {@link IndexSearcher#doc(int)} or {@link * IndexReader#document(int)}. */ public final List getFields() { diff --git a/lucene/src/java/org/apache/lucene/document/Fieldable.java b/lucene/src/java/org/apache/lucene/document/Fieldable.java index 35d2d06d611..75529bc2819 100755 --- a/lucene/src/java/org/apache/lucene/document/Fieldable.java +++ b/lucene/src/java/org/apache/lucene/document/Fieldable.java @@ -67,7 +67,7 @@ public interface Fieldable extends Serializable { * *

    Note: this value is not stored directly with the document in the index. * Documents returned from {@link org.apache.lucene.index.IndexReader#document(int)} and - * {@link org.apache.lucene.search.Searcher#doc(int)} may thus not have the same value present as when + * {@link org.apache.lucene.search.IndexSearcher#doc(int)} may thus not have the same value present as when * this field was indexed. * * @see #setBoost(float) diff --git a/lucene/src/java/org/apache/lucene/document/NumericField.java b/lucene/src/java/org/apache/lucene/document/NumericField.java index b1ccf52c8bc..4d008e0169f 100644 --- a/lucene/src/java/org/apache/lucene/document/NumericField.java +++ b/lucene/src/java/org/apache/lucene/document/NumericField.java @@ -134,8 +134,6 @@ import org.apache.lucene.search.FieldCache; // javadocs * values are returned as {@link String}s (according to * toString(value) of the used data type). * - * @lucene.experimental - * * @since 2.9 */ public final class NumericField extends AbstractField { diff --git a/lucene/src/java/org/apache/lucene/index/BufferedDeletes.java b/lucene/src/java/org/apache/lucene/index/BufferedDeletes.java index 3b144aada2c..c72a1f6b0a3 100644 --- a/lucene/src/java/org/apache/lucene/index/BufferedDeletes.java +++ b/lucene/src/java/org/apache/lucene/index/BufferedDeletes.java @@ -17,435 +17,228 @@ package org.apache.lucene.index; * limitations under the License. */ -import java.io.IOException; -import java.io.PrintStream; +import java.util.ArrayList; +import java.util.Iterator; import java.util.HashMap; -import java.util.Date; -import java.util.Map.Entry; +import java.util.List; import java.util.Map; -import java.util.concurrent.atomic.AtomicInteger; +import java.util.TreeMap; import java.util.concurrent.atomic.AtomicLong; +import java.util.concurrent.atomic.AtomicInteger; -import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; -import org.apache.lucene.search.Scorer; -import org.apache.lucene.search.Weight; +import org.apache.lucene.util.RamUsageEstimator; +import org.apache.lucene.index.BufferedDeletesStream.QueryAndLimit; -/** Holds a {@link SegmentDeletes} for each segment in the - * index. */ +/* Holds buffered deletes, by docID, term or query for a + * single segment. This is used to hold buffered pending + * deletes against the to-be-flushed segment. Once the + * deletes are pushed (on flush in DocumentsWriter), these + * deletes are converted to a FrozenDeletes instance. */ + +// NOTE: we are sync'd by BufferedDeletes, ie, all access to +// instances of this class is via sync'd methods on +// BufferedDeletes class BufferedDeletes { - // Deletes for all flushed/merged segments: - private final Map deletesMap = new HashMap(); + /* Rough logic: HashMap has an array[Entry] w/ varying + load factor (say 2 * POINTER). Entry is object w/ Term + key, Integer val, int hash, Entry next + (OBJ_HEADER + 3*POINTER + INT). Term is object w/ + String field and String text (OBJ_HEADER + 2*POINTER). + We don't count Term's field since it's interned. + Term's text is String (OBJ_HEADER + 4*INT + POINTER + + OBJ_HEADER + string.length*CHAR). Integer is + OBJ_HEADER + INT. */ + final static int BYTES_PER_DEL_TERM = 8*RamUsageEstimator.NUM_BYTES_OBJECT_REF + 5*RamUsageEstimator.NUM_BYTES_OBJECT_HEADER + 6*RamUsageEstimator.NUM_BYTES_INT; - // used only by assert - private Term lastDeleteTerm; - - private PrintStream infoStream; - private final AtomicLong bytesUsed = new AtomicLong(); - private final AtomicInteger numTerms = new AtomicInteger(); - private final int messageID; + /* Rough logic: del docIDs are List. Say list + allocates ~2X size (2*POINTER). Integer is OBJ_HEADER + + int */ + final static int BYTES_PER_DEL_DOCID = 2*RamUsageEstimator.NUM_BYTES_OBJECT_REF + RamUsageEstimator.NUM_BYTES_OBJECT_HEADER + RamUsageEstimator.NUM_BYTES_INT; - public BufferedDeletes(int messageID) { - this.messageID = messageID; - } + /* Rough logic: HashMap has an array[Entry] w/ varying + load factor (say 2 * POINTER). Entry is object w/ + Query key, Integer val, int hash, Entry next + (OBJ_HEADER + 3*POINTER + INT). Query we often + undercount (say 24 bytes). Integer is OBJ_HEADER + INT. */ + final static int BYTES_PER_DEL_QUERY = 5*RamUsageEstimator.NUM_BYTES_OBJECT_REF + 2*RamUsageEstimator.NUM_BYTES_OBJECT_HEADER + 2*RamUsageEstimator.NUM_BYTES_INT + 24; - private synchronized void message(String message) { - if (infoStream != null) { - infoStream.println("BD " + messageID + " [" + new Date() + "; " + Thread.currentThread().getName() + "]: BD " + message); - } - } - - public synchronized void setInfoStream(PrintStream infoStream) { - this.infoStream = infoStream; - } + final AtomicInteger numTermDeletes = new AtomicInteger(); + final Map terms; + final Map queries = new HashMap(); + final List docIDs = new ArrayList(); - public synchronized void pushDeletes(SegmentDeletes newDeletes, SegmentInfo info) { - pushDeletes(newDeletes, info, false); - } + public static final Integer MAX_INT = Integer.valueOf(Integer.MAX_VALUE); - // Moves all pending deletes onto the provided segment, - // then clears the pending deletes - public synchronized void pushDeletes(SegmentDeletes newDeletes, SegmentInfo info, boolean noLimit) { - assert newDeletes.any(); - numTerms.addAndGet(newDeletes.numTermDeletes.get()); + final AtomicLong bytesUsed = new AtomicLong(); - if (!noLimit) { - assert !deletesMap.containsKey(info); - assert info != null; - deletesMap.put(info, newDeletes); - bytesUsed.addAndGet(newDeletes.bytesUsed.get()); + private final static boolean VERBOSE_DELETES = false; + + long gen; + + public BufferedDeletes(boolean sortTerms) { + if (sortTerms) { + terms = new TreeMap(); } else { - final SegmentDeletes deletes = getDeletes(info); - bytesUsed.addAndGet(-deletes.bytesUsed.get()); - deletes.update(newDeletes, noLimit); - bytesUsed.addAndGet(deletes.bytesUsed.get()); - } - if (infoStream != null) { - message("push deletes seg=" + info + " dels=" + getDeletes(info)); + terms = new HashMap(); } - assert checkDeleteStats(); } - public synchronized void clear() { - deletesMap.clear(); - numTerms.set(0); + @Override + public String toString() { + if (VERBOSE_DELETES) { + return "gen=" + gen + " numTerms=" + numTermDeletes + ", terms=" + terms + + ", queries=" + queries + ", docIDs=" + docIDs + ", bytesUsed=" + + bytesUsed; + } else { + String s = "gen=" + gen; + if (numTermDeletes.get() != 0) { + s += " " + numTermDeletes.get() + " deleted terms (unique count=" + terms.size() + ")"; + } + if (queries.size() != 0) { + s += " " + queries.size() + " deleted queries"; + } + if (docIDs.size() != 0) { + s += " " + docIDs.size() + " deleted docIDs"; + } + if (bytesUsed.get() != 0) { + s += " bytesUsed=" + bytesUsed.get(); + } + + return s; + } + } + + void update(BufferedDeletes in) { + numTermDeletes.addAndGet(in.numTermDeletes.get()); + for (Map.Entry ent : in.terms.entrySet()) { + final Term term = ent.getKey(); + if (!terms.containsKey(term)) { + // only incr bytesUsed if this term wasn't already buffered: + bytesUsed.addAndGet(BYTES_PER_DEL_TERM); + } + terms.put(term, MAX_INT); + } + + for (Map.Entry ent : in.queries.entrySet()) { + final Query query = ent.getKey(); + if (!queries.containsKey(query)) { + // only incr bytesUsed if this query wasn't already buffered: + bytesUsed.addAndGet(BYTES_PER_DEL_QUERY); + } + queries.put(query, MAX_INT); + } + + // docIDs never move across segments and the docIDs + // should already be cleared + } + + void update(FrozenBufferedDeletes in) { + numTermDeletes.addAndGet(in.numTermDeletes); + for(Term term : in.terms) { + if (!terms.containsKey(term)) { + // only incr bytesUsed if this term wasn't already buffered: + bytesUsed.addAndGet(BYTES_PER_DEL_TERM); + } + terms.put(term, MAX_INT); + } + + for(int queryIdx=0;queryIdx termsIterable() { + return new Iterable() { + // @Override -- not until Java 1.6 + public Iterator iterator() { + return terms.keySet().iterator(); + } + }; + } + + public Iterable queriesIterable() { + return new Iterable() { + + // @Override -- not until Java 1.6 + public Iterator iterator() { + return new Iterator() { + private final Iterator> iter = queries.entrySet().iterator(); + + // @Override -- not until Java 1.6 + public boolean hasNext() { + return iter.hasNext(); + } + + // @Override -- not until Java 1.6 + public QueryAndLimit next() { + final Map.Entry ent = iter.next(); + return new QueryAndLimit(ent.getKey(), ent.getValue()); + } + + // @Override -- not until Java 1.6 + public void remove() { + throw new UnsupportedOperationException(); + } + }; + } + }; + } + + void clear() { + terms.clear(); + queries.clear(); + docIDs.clear(); + numTermDeletes.set(0); bytesUsed.set(0); } - - synchronized boolean any() { - return bytesUsed.get() != 0; - } - - public int numTerms() { - return numTerms.get(); - } - - public long bytesUsed() { - return bytesUsed.get(); - } - - // IW calls this on finishing a merge. While the merge - // was running, it's possible new deletes were pushed onto - // our last (and only our last) segment. In this case we - // must carry forward those deletes onto the merged - // segment. - synchronized void commitMerge(MergePolicy.OneMerge merge) { - assert checkDeleteStats(); - if (infoStream != null) { - message("commitMerge merge.info=" + merge.info + " merge.segments=" + merge.segments); - } - final SegmentInfo lastInfo = merge.segments.lastElement(); - final SegmentDeletes lastDeletes = deletesMap.get(lastInfo); - if (lastDeletes != null) { - deletesMap.remove(lastInfo); - assert !deletesMap.containsKey(merge.info); - deletesMap.put(merge.info, lastDeletes); - // don't need to update numTerms/bytesUsed since we - // are just moving the deletes from one info to - // another - if (infoStream != null) { - message("commitMerge done: new deletions=" + lastDeletes); - } - } else if (infoStream != null) { - message("commitMerge done: no new deletions"); - } - assert !anyDeletes(merge.segments.range(0, merge.segments.size()-1)); - assert checkDeleteStats(); - } - - synchronized void clear(SegmentDeletes deletes) { - deletes.clear(); + + void clearDocIDs() { + bytesUsed.addAndGet(-docIDs.size()*BYTES_PER_DEL_DOCID); + docIDs.clear(); } - public synchronized boolean applyDeletes(IndexWriter.ReaderPool readerPool, SegmentInfos segmentInfos, SegmentInfos applyInfos) throws IOException { - if (!any()) { - return false; - } - final long t0 = System.currentTimeMillis(); - - if (infoStream != null) { - message("applyDeletes: applyInfos=" + applyInfos + "; index=" + segmentInfos); - } - - assert checkDeleteStats(); - - assert applyInfos.size() > 0; - - boolean any = false; - - final SegmentInfo lastApplyInfo = applyInfos.lastElement(); - final int lastIdx = segmentInfos.indexOf(lastApplyInfo); - - final SegmentInfo firstInfo = applyInfos.firstElement(); - final int firstIdx = segmentInfos.indexOf(firstInfo); - - // applyInfos must be a slice of segmentInfos - assert lastIdx - firstIdx + 1 == applyInfos.size(); - - // iterate over all segment infos backwards - // coalesceing deletes along the way - // when we're at or below the last of the - // segments to apply to, start applying the deletes - // we traverse up to the first apply infos - SegmentDeletes coalescedDeletes = null; - boolean hasDeletes = false; - for (int segIdx=segmentInfos.size()-1; segIdx >= firstIdx; segIdx--) { - final SegmentInfo info = segmentInfos.info(segIdx); - final SegmentDeletes deletes = deletesMap.get(info); - assert deletes == null || deletes.any(); - - if (deletes == null && coalescedDeletes == null) { - continue; - } - - if (infoStream != null) { - message("applyDeletes: seg=" + info + " segment's deletes=[" + (deletes == null ? "null" : deletes) + "]; coalesced deletes=[" + (coalescedDeletes == null ? "null" : coalescedDeletes) + "]"); - } - - hasDeletes |= deletes != null; - - if (segIdx <= lastIdx && hasDeletes) { - - final long delCountInc = applyDeletes(readerPool, info, coalescedDeletes, deletes); - - if (delCountInc != 0) { - any = true; - } - if (infoStream != null) { - message("deletes touched " + delCountInc + " docIDs"); - } - - if (deletes != null) { - // we've applied doc ids, and they're only applied - // on the current segment - bytesUsed.addAndGet(-deletes.docIDs.size() * SegmentDeletes.BYTES_PER_DEL_DOCID); - deletes.clearDocIDs(); - } - } - - // now coalesce at the max limit - if (deletes != null) { - if (coalescedDeletes == null) { - coalescedDeletes = new SegmentDeletes(); - } - // TODO: we could make this single pass (coalesce as - // we apply the deletes - coalescedDeletes.update(deletes, true); - } - } - - // move all deletes to segment just before our merge. - if (firstIdx > 0) { - - SegmentDeletes mergedDeletes = null; - // TODO: we could also make this single pass - for (SegmentInfo info : applyInfos) { - final SegmentDeletes deletes = deletesMap.get(info); - if (deletes != null) { - assert deletes.any(); - if (mergedDeletes == null) { - mergedDeletes = getDeletes(segmentInfos.info(firstIdx-1)); - numTerms.addAndGet(-mergedDeletes.numTermDeletes.get()); - bytesUsed.addAndGet(-mergedDeletes.bytesUsed.get()); - } - - mergedDeletes.update(deletes, true); - } - } - - if (mergedDeletes != null) { - numTerms.addAndGet(mergedDeletes.numTermDeletes.get()); - bytesUsed.addAndGet(mergedDeletes.bytesUsed.get()); - } - - if (infoStream != null) { - if (mergedDeletes != null) { - message("applyDeletes: merge all deletes into seg=" + segmentInfos.info(firstIdx-1) + ": " + mergedDeletes); - } else { - message("applyDeletes: no deletes to merge"); - } - } - } else { - // We drop the deletes in this case, because we've - // applied them to segment infos starting w/ the first - // segment. There are no prior segments so there's no - // reason to keep them around. When the applyInfos == - // segmentInfos this means all deletes have been - // removed: - } - remove(applyInfos); - - assert checkDeleteStats(); - assert applyInfos != segmentInfos || !any(); - - if (infoStream != null) { - message("applyDeletes took " + (System.currentTimeMillis()-t0) + " msec"); - } - return any; - } - - private synchronized long applyDeletes(IndexWriter.ReaderPool readerPool, - SegmentInfo info, - SegmentDeletes coalescedDeletes, - SegmentDeletes segmentDeletes) throws IOException { - assert readerPool.infoIsLive(info); - - assert coalescedDeletes == null || coalescedDeletes.docIDs.size() == 0; - - long delCount = 0; - - // Lock order: IW -> BD -> RP - SegmentReader reader = readerPool.get(info, false); - try { - if (coalescedDeletes != null) { - delCount += applyDeletes(coalescedDeletes, reader); - } - if (segmentDeletes != null) { - delCount += applyDeletes(segmentDeletes, reader); - } - } finally { - readerPool.release(reader); - } - return delCount; - } - - private synchronized long applyDeletes(SegmentDeletes deletes, SegmentReader reader) throws IOException { - - long delCount = 0; - - assert checkDeleteTerm(null); - - if (deletes.terms.size() > 0) { - Fields fields = reader.fields(); - if (fields == null) { - // This reader has no postings - return 0; - } - - TermsEnum termsEnum = null; - - String currentField = null; - DocsEnum docs = null; - - for (Entry entry: deletes.terms.entrySet()) { - Term term = entry.getKey(); - // Since we visit terms sorted, we gain performance - // by re-using the same TermsEnum and seeking only - // forwards - if (term.field() != currentField) { - assert currentField == null || currentField.compareTo(term.field()) < 0; - currentField = term.field(); - Terms terms = fields.terms(currentField); - if (terms != null) { - termsEnum = terms.iterator(); - } else { - termsEnum = null; - } - } - - if (termsEnum == null) { - continue; - } - assert checkDeleteTerm(term); - - if (termsEnum.seek(term.bytes(), false) == TermsEnum.SeekStatus.FOUND) { - DocsEnum docsEnum = termsEnum.docs(reader.getDeletedDocs(), docs); - - if (docsEnum != null) { - docs = docsEnum; - final int limit = entry.getValue(); - while (true) { - final int docID = docs.nextDoc(); - if (docID == DocsEnum.NO_MORE_DOCS || docID >= limit) { - break; - } - reader.deleteDocument(docID); - // TODO: we could/should change - // reader.deleteDocument to return boolean - // true if it did in fact delete, because here - // we could be deleting an already-deleted doc - // which makes this an upper bound: - delCount++; - } - } - } - } - } - - // Delete by docID - for (Integer docIdInt : deletes.docIDs) { - int docID = docIdInt.intValue(); - reader.deleteDocument(docID); - delCount++; - } - - // Delete by query - if (deletes.queries.size() > 0) { - IndexSearcher searcher = new IndexSearcher(reader); - try { - for (Entry entry : deletes.queries.entrySet()) { - Query query = entry.getKey(); - int limit = entry.getValue().intValue(); - Weight weight = query.weight(searcher); - Scorer scorer = weight.scorer(reader, true, false); - if (scorer != null) { - while(true) { - int doc = scorer.nextDoc(); - if (doc >= limit) - break; - - reader.deleteDocument(doc); - // TODO: we could/should change - // reader.deleteDocument to return boolean - // true if it did in fact delete, because here - // we could be deleting an already-deleted doc - // which makes this an upper bound: - delCount++; - } - } - } - } finally { - searcher.close(); - } - } - - return delCount; - } - - public synchronized SegmentDeletes getDeletes(SegmentInfo info) { - SegmentDeletes deletes = deletesMap.get(info); - if (deletes == null) { - deletes = new SegmentDeletes(); - deletesMap.put(info, deletes); - } - return deletes; - } - - public synchronized void remove(SegmentInfos infos) { - assert infos.size() > 0; - for (SegmentInfo info : infos) { - SegmentDeletes deletes = deletesMap.get(info); - if (deletes != null) { - bytesUsed.addAndGet(-deletes.bytesUsed.get()); - assert bytesUsed.get() >= 0: "bytesUsed=" + bytesUsed; - numTerms.addAndGet(-deletes.numTermDeletes.get()); - assert numTerms.get() >= 0: "numTerms=" + numTerms; - deletesMap.remove(info); - } - } - } - - // used only by assert - private boolean anyDeletes(SegmentInfos infos) { - for(SegmentInfo info : infos) { - if (deletesMap.containsKey(info)) { - return true; - } - } - return false; - } - - // used only by assert - private boolean checkDeleteTerm(Term term) { - if (term != null) { - assert lastDeleteTerm == null || term.compareTo(lastDeleteTerm) > 0: "lastTerm=" + lastDeleteTerm + " vs term=" + term; - } - lastDeleteTerm = term; - return true; - } - - // only for assert - private boolean checkDeleteStats() { - int numTerms2 = 0; - long bytesUsed2 = 0; - for(SegmentDeletes deletes : deletesMap.values()) { - numTerms2 += deletes.numTermDeletes.get(); - bytesUsed2 += deletes.bytesUsed.get(); - } - assert numTerms2 == numTerms.get(): "numTerms2=" + numTerms2 + " vs " + numTerms.get(); - assert bytesUsed2 == bytesUsed.get(): "bytesUsed2=" + bytesUsed2 + " vs " + bytesUsed; - return true; + boolean any() { + return terms.size() > 0 || docIDs.size() > 0 || queries.size() > 0; } } diff --git a/lucene/src/java/org/apache/lucene/index/BufferedDeletesStream.java b/lucene/src/java/org/apache/lucene/index/BufferedDeletesStream.java new file mode 100644 index 00000000000..de3046db5dd --- /dev/null +++ b/lucene/src/java/org/apache/lucene/index/BufferedDeletesStream.java @@ -0,0 +1,441 @@ +package org.apache.lucene.index; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.io.PrintStream; +import java.util.List; +import java.util.ArrayList; +import java.util.Date; +import java.util.Comparator; +import java.util.Collections; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.concurrent.atomic.AtomicLong; + +import org.apache.lucene.index.IndexReader.AtomicReaderContext; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.Scorer; +import org.apache.lucene.search.Weight; + +/* Tracks the stream of {@link BuffereDeletes}. + * When DocumensWriter flushes, its buffered + * deletes are appended to this stream. We later + * apply these deletes (resolve them to the actual + * docIDs, per segment) when a merge is started + * (only to the to-be-merged segments). We + * also apply to all segments when NRT reader is pulled, + * commit/close is called, or when too many deletes are + * buffered and must be flushed (by RAM usage or by count). + * + * Each packet is assigned a generation, and each flushed or + * merged segment is also assigned a generation, so we can + * track which BufferedDeletes packets to apply to any given + * segment. */ + +class BufferedDeletesStream { + + // TODO: maybe linked list? + private final List deletes = new ArrayList(); + + // Starts at 1 so that SegmentInfos that have never had + // deletes applied (whose bufferedDelGen defaults to 0) + // will be correct: + private long nextGen = 1; + + // used only by assert + private Term lastDeleteTerm; + + private PrintStream infoStream; + private final AtomicLong bytesUsed = new AtomicLong(); + private final AtomicInteger numTerms = new AtomicInteger(); + private final int messageID; + + public BufferedDeletesStream(int messageID) { + this.messageID = messageID; + } + + private synchronized void message(String message) { + if (infoStream != null) { + infoStream.println("BD " + messageID + " [" + new Date() + "; " + Thread.currentThread().getName() + "]: " + message); + } + } + + public synchronized void setInfoStream(PrintStream infoStream) { + this.infoStream = infoStream; + } + + // Appends a new packet of buffered deletes to the stream, + // setting its generation: + public synchronized void push(FrozenBufferedDeletes packet) { + assert packet.any(); + assert checkDeleteStats(); + assert packet.gen < nextGen; + deletes.add(packet); + numTerms.addAndGet(packet.numTermDeletes); + bytesUsed.addAndGet(packet.bytesUsed); + if (infoStream != null) { + message("push deletes " + packet + " delGen=" + packet.gen + " packetCount=" + deletes.size()); + } + assert checkDeleteStats(); + } + + public synchronized void clear() { + deletes.clear(); + nextGen = 1; + numTerms.set(0); + bytesUsed.set(0); + } + + public boolean any() { + return bytesUsed.get() != 0; + } + + public int numTerms() { + return numTerms.get(); + } + + public long bytesUsed() { + return bytesUsed.get(); + } + + public static class ApplyDeletesResult { + // True if any actual deletes took place: + public final boolean anyDeletes; + + // Current gen, for the merged segment: + public final long gen; + + ApplyDeletesResult(boolean anyDeletes, long gen) { + this.anyDeletes = anyDeletes; + this.gen = gen; + } + } + + // Sorts SegmentInfos from smallest to biggest bufferedDelGen: + private static final Comparator sortByDelGen = new Comparator() { + // @Override -- not until Java 1.6 + public int compare(SegmentInfo si1, SegmentInfo si2) { + final long cmp = si1.getBufferedDeletesGen() - si2.getBufferedDeletesGen(); + if (cmp > 0) { + return 1; + } else if (cmp < 0) { + return -1; + } else { + return 0; + } + } + + @Override + public boolean equals(Object other) { + return sortByDelGen == other; + } + }; + + /** Resolves the buffered deleted Term/Query/docIDs, into + * actual deleted docIDs in the deletedDocs BitVector for + * each SegmentReader. */ + public synchronized ApplyDeletesResult applyDeletes(IndexWriter.ReaderPool readerPool, SegmentInfos infos) throws IOException { + final long t0 = System.currentTimeMillis(); + + if (infos.size() == 0) { + return new ApplyDeletesResult(false, nextGen++); + } + + assert checkDeleteStats(); + + if (!any()) { + message("applyDeletes: no deletes; skipping"); + return new ApplyDeletesResult(false, nextGen++); + } + + if (infoStream != null) { + message("applyDeletes: infos=" + infos + " packetCount=" + deletes.size()); + } + + SegmentInfos infos2 = new SegmentInfos(); + infos2.addAll(infos); + Collections.sort(infos2, sortByDelGen); + + BufferedDeletes coalescedDeletes = null; + boolean anyNewDeletes = false; + + int infosIDX = infos2.size()-1; + int delIDX = deletes.size()-1; + + while (infosIDX >= 0) { + //System.out.println("BD: cycle delIDX=" + delIDX + " infoIDX=" + infosIDX); + + final FrozenBufferedDeletes packet = delIDX >= 0 ? deletes.get(delIDX) : null; + final SegmentInfo info = infos2.get(infosIDX); + final long segGen = info.getBufferedDeletesGen(); + + if (packet != null && segGen < packet.gen) { + //System.out.println(" coalesce"); + if (coalescedDeletes == null) { + coalescedDeletes = new BufferedDeletes(true); + } + coalescedDeletes.update(packet); + delIDX--; + } else if (packet != null && segGen == packet.gen) { + //System.out.println(" eq"); + + // Lock order: IW -> BD -> RP + assert readerPool.infoIsLive(info); + SegmentReader reader = readerPool.get(info, false); + int delCount = 0; + try { + if (coalescedDeletes != null) { + //System.out.println(" del coalesced"); + delCount += applyTermDeletes(coalescedDeletes.termsIterable(), reader); + delCount += applyQueryDeletes(coalescedDeletes.queriesIterable(), reader); + } + //System.out.println(" del exact"); + // Don't delete by Term here; DocumentsWriter + // already did that on flush: + delCount += applyQueryDeletes(packet.queriesIterable(), reader); + } finally { + readerPool.release(reader); + } + anyNewDeletes |= delCount > 0; + + if (infoStream != null) { + message("seg=" + info + " segGen=" + segGen + " segDeletes=[" + packet + "]; coalesced deletes=[" + (coalescedDeletes == null ? "null" : coalescedDeletes) + "] delCount=" + delCount); + } + + if (coalescedDeletes == null) { + coalescedDeletes = new BufferedDeletes(true); + } + coalescedDeletes.update(packet); + delIDX--; + infosIDX--; + info.setBufferedDeletesGen(nextGen); + + } else { + //System.out.println(" gt"); + + if (coalescedDeletes != null) { + // Lock order: IW -> BD -> RP + assert readerPool.infoIsLive(info); + SegmentReader reader = readerPool.get(info, false); + int delCount = 0; + try { + delCount += applyTermDeletes(coalescedDeletes.termsIterable(), reader); + delCount += applyQueryDeletes(coalescedDeletes.queriesIterable(), reader); + } finally { + readerPool.release(reader); + } + anyNewDeletes |= delCount > 0; + + if (infoStream != null) { + message("seg=" + info + " segGen=" + segGen + " coalesced deletes=[" + (coalescedDeletes == null ? "null" : coalescedDeletes) + "] delCount=" + delCount); + } + } + info.setBufferedDeletesGen(nextGen); + + infosIDX--; + } + } + + assert checkDeleteStats(); + if (infoStream != null) { + message("applyDeletes took " + (System.currentTimeMillis()-t0) + " msec"); + } + // assert infos != segmentInfos || !any() : "infos=" + infos + " segmentInfos=" + segmentInfos + " any=" + any; + + return new ApplyDeletesResult(anyNewDeletes, nextGen++); + } + + public synchronized long getNextGen() { + return nextGen++; + } + + // Lock order IW -> BD + /* Removes any BufferedDeletes that we no longer need to + * store because all segments in the index have had the + * deletes applied. */ + public synchronized void prune(SegmentInfos segmentInfos) { + assert checkDeleteStats(); + long minGen = Long.MAX_VALUE; + for(SegmentInfo info : segmentInfos) { + minGen = Math.min(info.getBufferedDeletesGen(), minGen); + } + + if (infoStream != null) { + message("prune sis=" + segmentInfos + " minGen=" + minGen + " packetCount=" + deletes.size()); + } + + final int limit = deletes.size(); + for(int delIDX=0;delIDX= minGen) { + prune(delIDX); + assert checkDeleteStats(); + return; + } + } + + // All deletes pruned + prune(limit); + assert !any(); + assert checkDeleteStats(); + } + + private synchronized void prune(int count) { + if (count > 0) { + if (infoStream != null) { + message("pruneDeletes: prune " + count + " packets; " + (deletes.size() - count) + " packets remain"); + } + for(int delIDX=0;delIDX= 0; + bytesUsed.addAndGet(-packet.bytesUsed); + assert bytesUsed.get() >= 0; + } + deletes.subList(0, count).clear(); + } + } + + // Delete by Term + private synchronized long applyTermDeletes(Iterable termsIter, SegmentReader reader) throws IOException { + long delCount = 0; + Fields fields = reader.fields(); + if (fields == null) { + // This reader has no postings + return 0; + } + + TermsEnum termsEnum = null; + + String currentField = null; + DocsEnum docs = null; + + assert checkDeleteTerm(null); + + for (Term term : termsIter) { + // Since we visit terms sorted, we gain performance + // by re-using the same TermsEnum and seeking only + // forwards + if (term.field() != currentField) { + assert currentField == null || currentField.compareTo(term.field()) < 0; + currentField = term.field(); + Terms terms = fields.terms(currentField); + if (terms != null) { + termsEnum = terms.iterator(); + } else { + termsEnum = null; + } + } + + if (termsEnum == null) { + continue; + } + assert checkDeleteTerm(term); + + // System.out.println(" term=" + term); + + if (termsEnum.seek(term.bytes(), false) == TermsEnum.SeekStatus.FOUND) { + DocsEnum docsEnum = termsEnum.docs(reader.getDeletedDocs(), docs); + + if (docsEnum != null) { + while (true) { + final int docID = docsEnum.nextDoc(); + if (docID == DocsEnum.NO_MORE_DOCS) { + break; + } + reader.deleteDocument(docID); + // TODO: we could/should change + // reader.deleteDocument to return boolean + // true if it did in fact delete, because here + // we could be deleting an already-deleted doc + // which makes this an upper bound: + delCount++; + } + } + } + } + + return delCount; + } + + public static class QueryAndLimit { + public final Query query; + public final int limit; + public QueryAndLimit(Query query, int limit) { + this.query = query; + this.limit = limit; + } + } + + // Delete by query + private synchronized long applyQueryDeletes(Iterable queriesIter, SegmentReader reader) throws IOException { + long delCount = 0; + IndexSearcher searcher = new IndexSearcher(reader); + assert searcher.getTopReaderContext().isAtomic; + final AtomicReaderContext readerContext = (AtomicReaderContext) searcher.getTopReaderContext(); + try { + for (QueryAndLimit ent : queriesIter) { + Query query = ent.query; + int limit = ent.limit; + Weight weight = query.weight(searcher); + Scorer scorer = weight.scorer(readerContext, Weight.ScorerContext.def()); + if (scorer != null) { + while(true) { + int doc = scorer.nextDoc(); + if (doc >= limit) + break; + + reader.deleteDocument(doc); + // TODO: we could/should change + // reader.deleteDocument to return boolean + // true if it did in fact delete, because here + // we could be deleting an already-deleted doc + // which makes this an upper bound: + delCount++; + } + } + } + } finally { + searcher.close(); + } + + return delCount; + } + + // used only by assert + private boolean checkDeleteTerm(Term term) { + if (term != null) { + assert lastDeleteTerm == null || term.compareTo(lastDeleteTerm) > 0: "lastTerm=" + lastDeleteTerm + " vs term=" + term; + } + lastDeleteTerm = term; + return true; + } + + // only for assert + private boolean checkDeleteStats() { + int numTerms2 = 0; + long bytesUsed2 = 0; + for(FrozenBufferedDeletes packet : deletes) { + numTerms2 += packet.numTermDeletes; + bytesUsed2 += packet.bytesUsed; + } + assert numTerms2 == numTerms.get(): "numTerms2=" + numTerms2 + " vs " + numTerms.get(); + assert bytesUsed2 == bytesUsed.get(): "bytesUsed2=" + bytesUsed2 + " vs " + bytesUsed; + return true; + } +} diff --git a/lucene/src/java/org/apache/lucene/index/CheckIndex.java b/lucene/src/java/org/apache/lucene/index/CheckIndex.java index 392ab635249..a109afdb3de 100644 --- a/lucene/src/java/org/apache/lucene/index/CheckIndex.java +++ b/lucene/src/java/org/apache/lucene/index/CheckIndex.java @@ -548,10 +548,10 @@ public class CheckIndex { if (infoStream != null) { infoStream.print(" test: field norms........."); } - final byte[] b = new byte[reader.maxDoc()]; + byte[] b; for (final String fieldName : fieldNames) { if (reader.hasNorms(fieldName)) { - reader.norms(fieldName, b, 0); + b = reader.norms(fieldName); ++status.totFields; } } @@ -610,6 +610,8 @@ public class CheckIndex { Comparator termComp = terms.getComparator(); + long sumTotalTermFreq = 0; + while(true) { final BytesRef term = terms.next(); @@ -660,6 +662,8 @@ public class CheckIndex { } int lastDoc = -1; + int docCount = 0; + long totalTermFreq = 0; while(true) { final int doc = docs2.nextDoc(); if (doc == DocIdSetIterator.NO_MORE_DOCS) { @@ -667,6 +671,8 @@ public class CheckIndex { } final int freq = docs2.freq(); status.totPos += freq; + totalTermFreq += freq; + docCount++; if (doc <= lastDoc) { throw new RuntimeException("term " + term + ": doc " + doc + " <= lastDoc " + lastDoc); @@ -697,22 +703,39 @@ public class CheckIndex { } } } + + final long totalTermFreq2 = terms.totalTermFreq(); + final boolean hasTotalTermFreq = postings != null && totalTermFreq2 != -1; - // Now count how many deleted docs occurred in - // this term: - + // Re-count if there are deleted docs: if (reader.hasDeletions()) { final DocsEnum docsNoDel = terms.docs(null, docs); - int count = 0; + docCount = 0; + totalTermFreq = 0; while(docsNoDel.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) { - count++; + docCount++; + totalTermFreq += docsNoDel.freq(); } - if (count != docFreq) { - throw new RuntimeException("term " + term + " docFreq=" + docFreq + " != tot docs w/o deletions " + count); + } + + if (docCount != docFreq) { + throw new RuntimeException("term " + term + " docFreq=" + docFreq + " != tot docs w/o deletions " + docCount); + } + if (hasTotalTermFreq) { + sumTotalTermFreq += totalTermFreq; + if (totalTermFreq != totalTermFreq2) { + throw new RuntimeException("term " + term + " totalTermFreq=" + totalTermFreq2 + " != recomputed totalTermFreq=" + totalTermFreq); } } } + if (sumTotalTermFreq != 0) { + final long v = fields.terms(field).getSumTotalTermFreq(); + if (v != -1 && sumTotalTermFreq != v) { + throw new RuntimeException("sumTotalTermFreq for field " + field + "=" + v + " != recomputed sumTotalTermFreq=" + sumTotalTermFreq); + } + } + // Test seek to last term: if (lastTerm != null) { if (terms.seek(lastTerm) != TermsEnum.SeekStatus.FOUND) { @@ -779,7 +802,7 @@ public class CheckIndex { msg("OK [" + status.termCount + " terms; " + status.totFreq + " terms/docs pairs; " + status.totPos + " tokens]"); } catch (Throwable e) { - msg("ERROR [" + String.valueOf(e.getMessage()) + "]"); + msg("ERROR: " + e); status.error = e; if (infoStream != null) { e.printStackTrace(infoStream); diff --git a/lucene/src/java/org/apache/lucene/index/CompoundFileWriter.java b/lucene/src/java/org/apache/lucene/index/CompoundFileWriter.java index a11dab49d03..c80a8343b16 100644 --- a/lucene/src/java/org/apache/lucene/index/CompoundFileWriter.java +++ b/lucene/src/java/org/apache/lucene/index/CompoundFileWriter.java @@ -17,15 +17,15 @@ package org.apache.lucene.index; * limitations under the License. */ -import org.apache.lucene.store.Directory; -import org.apache.lucene.store.IndexOutput; -import org.apache.lucene.store.IndexInput; -import org.apache.lucene.util.IOUtils; - -import java.util.LinkedList; -import java.util.HashSet; - import java.io.IOException; +import java.util.HashSet; +import java.util.LinkedList; + +import org.apache.lucene.index.codecs.MergeState; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.IndexInput; +import org.apache.lucene.store.IndexOutput; +import org.apache.lucene.util.IOUtils; /** * Combines multiple files into a single compound file. @@ -80,7 +80,7 @@ final class CompoundFileWriter { private HashSet ids; private LinkedList entries; private boolean merged = false; - private SegmentMerger.CheckAbort checkAbort; + private MergeState.CheckAbort checkAbort; /** Create the compound stream in the specified file. The file name is the * entire name (no extensions are added). @@ -90,7 +90,7 @@ final class CompoundFileWriter { this(dir, name, null); } - CompoundFileWriter(Directory dir, String name, SegmentMerger.CheckAbort checkAbort) { + CompoundFileWriter(Directory dir, String name, MergeState.CheckAbort checkAbort) { if (dir == null) throw new NullPointerException("directory cannot be null"); if (name == null) diff --git a/lucene/src/java/org/apache/lucene/index/ConcurrentMergeScheduler.java b/lucene/src/java/org/apache/lucene/index/ConcurrentMergeScheduler.java index b33aa6c1098..b9cafc7c5c2 100644 --- a/lucene/src/java/org/apache/lucene/index/ConcurrentMergeScheduler.java +++ b/lucene/src/java/org/apache/lucene/index/ConcurrentMergeScheduler.java @@ -142,8 +142,12 @@ public class ConcurrentMergeScheduler extends MergeScheduler { } }; - /** Called whenever the running merges have changed, to - * pause & unpause threads. */ + /** + * Called whenever the running merges have changed, to pause & unpause + * threads. This method sorts the merge threads by their merge size in + * descending order and then pauses/unpauses threads from first to last -- + * that way, smaller merges are guaranteed to run before larger ones. + */ protected synchronized void updateMergeThreads() { // Only look at threads that are alive & not in the @@ -164,6 +168,7 @@ public class ConcurrentMergeScheduler extends MergeScheduler { threadIdx++; } + // Sort the merge threads in descending order. CollectionUtil.mergeSort(activeMerges, compareByMergeDocCount); int pri = mergeThreadPriority; @@ -175,12 +180,8 @@ public class ConcurrentMergeScheduler extends MergeScheduler { continue; } - final boolean doPause; - if (threadIdx < activeMergeCount-maxThreadCount) { - doPause = true; - } else { - doPause = false; - } + // pause the thread if maxThreadCount is smaller than the number of merge threads. + final boolean doPause = threadIdx < activeMergeCount - maxThreadCount; if (verbose()) { if (doPause != merge.getPause()) { @@ -205,13 +206,26 @@ public class ConcurrentMergeScheduler extends MergeScheduler { } } - private boolean verbose() { + /** + * Returns true if verbosing is enabled. This method is usually used in + * conjunction with {@link #message(String)}, like that: + * + *

    +   * if (verbose()) {
    +   *   message("your message");
    +   * }
    +   * 
    + */ + protected boolean verbose() { return writer != null && writer.verbose(); } - private void message(String message) { - if (verbose()) - writer.message("CMS: " + message); + /** + * Outputs the given message - this method assumes {@link #verbose()} was + * called and returned true. + */ + protected void message(String message) { + writer.message("CMS: " + message); } private synchronized void initMergeThreadPriority() { @@ -231,10 +245,10 @@ public class ConcurrentMergeScheduler extends MergeScheduler { /** Wait for any running merge threads to finish */ public void sync() { - while(true) { + while (true) { MergeThread toSync = null; - synchronized(this) { - for(MergeThread t : mergeThreads) { + synchronized (this) { + for (MergeThread t : mergeThreads) { if (t.isAlive()) { toSync = t; break; @@ -253,12 +267,14 @@ public class ConcurrentMergeScheduler extends MergeScheduler { } } - private synchronized int mergeThreadCount() { + /** + * Returns the number of merge threads that are alive. Note that this number + * is ≤ {@link #mergeThreads} size. + */ + protected synchronized int mergeThreadCount() { int count = 0; - final int numThreads = mergeThreads.size(); - for(int i=0;i= 1+maxMergeCount) { + startStallTime = System.currentTimeMillis(); + if (verbose()) { + message(" too many merges; stalling..."); + } + try { + wait(); + } catch (InterruptedException ie) { + throw new ThreadInterruptedException(ie); + } + } + + if (verbose()) { + if (startStallTime != 0) { + message(" stalled for " + (System.currentTimeMillis()-startStallTime) + " msec"); + } + } + } + // TODO: we could be careful about which merges to do in // the BG (eg maybe the "biggest" ones) vs FG, which // merges to do first (the easiest ones?), etc. - MergePolicy.OneMerge merge = writer.getNextMerge(); if (merge == null) { if (verbose()) @@ -311,32 +347,11 @@ public class ConcurrentMergeScheduler extends MergeScheduler { boolean success = false; try { synchronized(this) { - final MergeThread merger; - long startStallTime = 0; - while (mergeThreadCount() >= maxMergeCount) { - startStallTime = System.currentTimeMillis(); - if (verbose()) { - message(" too many merges; stalling..."); - } - try { - wait(); - } catch (InterruptedException ie) { - throw new ThreadInterruptedException(ie); - } - } - - if (verbose()) { - if (startStallTime != 0) { - message(" stalled for " + (System.currentTimeMillis()-startStallTime) + " msec"); - } - message(" consider merge " + merge.segString(dir)); - } - - assert mergeThreadCount() < maxMergeCount; + message(" consider merge " + merge.segString(dir)); // OK to spawn a new merge thread to handle this // merge: - merger = getMergeThread(writer, merge); + final MergeThread merger = getMergeThread(writer, merge); mergeThreads.add(merger); if (verbose()) { message(" launch new thread [" + merger.getName() + "]"); @@ -360,8 +375,7 @@ public class ConcurrentMergeScheduler extends MergeScheduler { } /** Does the actual merge, by calling {@link IndexWriter#merge} */ - protected void doMerge(MergePolicy.OneMerge merge) - throws IOException { + protected void doMerge(MergePolicy.OneMerge merge) throws IOException { writer.merge(merge); } diff --git a/lucene/src/java/org/apache/lucene/index/DirectoryReader.java b/lucene/src/java/org/apache/lucene/index/DirectoryReader.java index d9571dddafb..600ed1e7508 100644 --- a/lucene/src/java/org/apache/lucene/index/DirectoryReader.java +++ b/lucene/src/java/org/apache/lucene/index/DirectoryReader.java @@ -27,6 +27,7 @@ import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; +import java.util.concurrent.ConcurrentHashMap; import org.apache.lucene.document.Document; import org.apache.lucene.document.FieldSelector; @@ -35,10 +36,8 @@ import org.apache.lucene.store.Lock; import org.apache.lucene.store.LockObtainFailedException; import org.apache.lucene.index.codecs.CodecProvider; import org.apache.lucene.util.Bits; -import org.apache.lucene.util.ReaderUtil; import org.apache.lucene.util.BytesRef; - -import org.apache.lucene.search.FieldCache; // not great (circular); used only to purge FieldCache entry on close +import org.apache.lucene.util.MapBackedSet; /** * An IndexReader which reads indexes with multiple segments. @@ -60,8 +59,8 @@ class DirectoryReader extends IndexReader implements Cloneable { private boolean rollbackHasChanges; private SegmentReader[] subReaders; + private ReaderContext topLevelReaderContext; private int[] starts; // 1st docno for each segment - private final Map subReaderToSlice = new HashMap(); private int maxDoc = 0; private int numDocs = -1; private boolean hasDeletions = false; @@ -71,6 +70,8 @@ class DirectoryReader extends IndexReader implements Cloneable { // opened on a past IndexCommit: private long maxIndexVersion; + private final boolean applyAllDeletes; + // static IndexReader open(final Directory directory, final IndexDeletionPolicy deletionPolicy, final IndexCommit commit, final boolean readOnly, // final int termInfosIndexDivisor) throws CorruptIndexException, IOException { // return open(directory, deletionPolicy, commit, readOnly, termInfosIndexDivisor, null); @@ -107,6 +108,8 @@ class DirectoryReader extends IndexReader implements Cloneable { } else { this.codecs = codecs; } + readerFinishedListeners = new MapBackedSet(new ConcurrentHashMap()); + applyAllDeletes = false; // To reduce the chance of hitting FileNotFound // (and having to retry), we open segments in @@ -118,6 +121,7 @@ class DirectoryReader extends IndexReader implements Cloneable { boolean success = false; try { readers[i] = SegmentReader.get(readOnly, sis.info(i), termInfosIndexDivisor); + readers[i].readerFinishedListeners = readerFinishedListeners; success = true; } finally { if (!success) { @@ -137,9 +141,11 @@ class DirectoryReader extends IndexReader implements Cloneable { } // Used by near real-time search - DirectoryReader(IndexWriter writer, SegmentInfos infos, int termInfosIndexDivisor, CodecProvider codecs) throws IOException { + DirectoryReader(IndexWriter writer, SegmentInfos infos, int termInfosIndexDivisor, CodecProvider codecs, boolean applyAllDeletes) throws IOException { this.directory = writer.getDirectory(); this.readOnly = true; + this.applyAllDeletes = applyAllDeletes; // saved for reopen + segmentInfos = (SegmentInfos) infos.clone();// make sure we clone otherwise we share mutable state with IW this.termInfosIndexDivisor = termInfosIndexDivisor; if (codecs == null) { @@ -147,6 +153,7 @@ class DirectoryReader extends IndexReader implements Cloneable { } else { this.codecs = codecs; } + readerFinishedListeners = writer.getReaderFinishedListeners(); // IndexWriter synchronizes externally before calling // us, which ensures infos will not change; so there's @@ -161,6 +168,7 @@ class DirectoryReader extends IndexReader implements Cloneable { final SegmentInfo info = infos.info(i); assert info.dir == dir; readers[i] = writer.readerPool.getReadOnlyClone(info, true, termInfosIndexDivisor); + readers[i].readerFinishedListeners = readerFinishedListeners; success = true; } finally { if (!success) { @@ -183,11 +191,15 @@ class DirectoryReader extends IndexReader implements Cloneable { /** This constructor is only used for {@link #reopen()} */ DirectoryReader(Directory directory, SegmentInfos infos, SegmentReader[] oldReaders, int[] oldStarts, - boolean readOnly, boolean doClone, int termInfosIndexDivisor, CodecProvider codecs) throws IOException { + boolean readOnly, boolean doClone, int termInfosIndexDivisor, CodecProvider codecs, + Collection readerFinishedListeners) throws IOException { this.directory = directory; this.readOnly = readOnly; this.segmentInfos = infos; this.termInfosIndexDivisor = termInfosIndexDivisor; + this.readerFinishedListeners = readerFinishedListeners; + applyAllDeletes = false; + if (codecs == null) { this.codecs = CodecProvider.getDefault(); } else { @@ -233,8 +245,10 @@ class DirectoryReader extends IndexReader implements Cloneable { // this is a new reader; in case we hit an exception we can close it safely newReader = SegmentReader.get(readOnly, infos.info(i), termInfosIndexDivisor); + newReader.readerFinishedListeners = readerFinishedListeners; } else { newReader = newReaders[i].reopenSegment(infos.info(i), doClone, readOnly); + assert newReader.readerFinishedListeners == readerFinishedListeners; } if (newReader == newReaders[i]) { // this reader will be shared between the old and the new one, @@ -300,25 +314,22 @@ class DirectoryReader extends IndexReader implements Cloneable { private void initialize(SegmentReader[] subReaders) throws IOException { this.subReaders = subReaders; starts = new int[subReaders.length + 1]; // build starts array - + final AtomicReaderContext[] subReaderCtx = new AtomicReaderContext[subReaders.length]; + topLevelReaderContext = new CompositeReaderContext(this, subReaderCtx, subReaderCtx); final List subFields = new ArrayList(); - final List fieldSlices = new ArrayList(); - + for (int i = 0; i < subReaders.length; i++) { starts[i] = maxDoc; + subReaderCtx[i] = new AtomicReaderContext(topLevelReaderContext, subReaders[i], i, maxDoc, i, maxDoc); maxDoc += subReaders[i].maxDoc(); // compute maxDocs if (subReaders[i].hasDeletions()) { hasDeletions = true; } - - final ReaderUtil.Slice slice = new ReaderUtil.Slice(starts[i], subReaders[i].maxDoc(), i); - subReaderToSlice.put(subReaders[i], slice); - + final Fields f = subReaders[i].fields(); if (f != null) { subFields.add(f); - fieldSlices.add(slice); } } starts[subReaders.length] = maxDoc; @@ -361,6 +372,7 @@ class DirectoryReader extends IndexReader implements Cloneable { writeLock = null; hasChanges = false; } + assert newReader.readerFinishedListeners != null; return newReader; } @@ -395,7 +407,9 @@ class DirectoryReader extends IndexReader implements Cloneable { // TODO: right now we *always* make a new reader; in // the future we could have write make some effort to // detect that no changes have occurred - return writer.getReader(); + IndexReader reader = writer.getReader(applyAllDeletes); + reader.readerFinishedListeners = readerFinishedListeners; + return reader; } private IndexReader doReopen(final boolean openReadOnly, IndexCommit commit) throws CorruptIndexException, IOException { @@ -462,7 +476,7 @@ class DirectoryReader extends IndexReader implements Cloneable { private synchronized DirectoryReader doReopen(SegmentInfos infos, boolean doClone, boolean openReadOnly) throws CorruptIndexException, IOException { DirectoryReader reader; - reader = new DirectoryReader(directory, infos, subReaders, starts, openReadOnly, doClone, termInfosIndexDivisor, codecs); + reader = new DirectoryReader(directory, infos, subReaders, starts, openReadOnly, doClone, termInfosIndexDivisor, codecs, readerFinishedListeners); return reader; } @@ -605,12 +619,6 @@ class DirectoryReader extends IndexReader implements Cloneable { throw new UnsupportedOperationException("please use MultiNorms.norms, or wrap your IndexReader with SlowMultiReaderWrapper, if you really need a top level norms"); } - @Override - public synchronized void norms(String field, byte[] result, int offset) - throws IOException { - throw new UnsupportedOperationException("please use MultiNorms.norms, or wrap your IndexReader with SlowMultiReaderWrapper, if you really need a top level norms"); - } - @Override protected void doSetNorm(int n, String field, byte value) throws CorruptIndexException, IOException { @@ -715,11 +723,18 @@ class DirectoryReader extends IndexReader implements Cloneable { // case we have to roll back: startCommit(); + final SegmentInfos rollbackSegmentInfos = new SegmentInfos(); + rollbackSegmentInfos.addAll(segmentInfos); + boolean success = false; try { for (int i = 0; i < subReaders.length; i++) subReaders[i].commit(); + // Remove segments that contain only 100% deleted + // docs: + segmentInfos.pruneDeletedSegments(); + // Sync all files we just wrote directory.sync(segmentInfos.files(directory, false)); segmentInfos.commit(directory); @@ -739,6 +754,10 @@ class DirectoryReader extends IndexReader implements Cloneable { // partially written .del files, etc, are // removed): deleter.refresh(); + + // Restore all SegmentInfos (in case we pruned some) + segmentInfos.clear(); + segmentInfos.addAll(rollbackSegmentInfos); } } @@ -815,11 +834,6 @@ class DirectoryReader extends IndexReader implements Cloneable { } } - // NOTE: only needed in case someone had asked for - // FieldCache for top-level reader (which is generally - // not a good idea): - FieldCache.DEFAULT.purge(this); - if (writer != null) { // Since we just closed, writer may now be able to // delete unused files: @@ -844,18 +858,18 @@ class DirectoryReader extends IndexReader implements Cloneable { fieldSet.addAll(names); } return fieldSet; - } + } + + @Override + public ReaderContext getTopReaderContext() { + return topLevelReaderContext; + } @Override public IndexReader[] getSequentialSubReaders() { return subReaders; } - @Override - public int getSubReaderDocBase(IndexReader subReader) { - return subReaderToSlice.get(subReader).start; - } - /** Returns the directory this index resides in. */ @Override public Directory directory() { diff --git a/lucene/src/java/org/apache/lucene/index/DocInverterPerField.java b/lucene/src/java/org/apache/lucene/index/DocInverterPerField.java index 95d64c44136..d360fbfb230 100644 --- a/lucene/src/java/org/apache/lucene/index/DocInverterPerField.java +++ b/lucene/src/java/org/apache/lucene/index/DocInverterPerField.java @@ -63,8 +63,6 @@ final class DocInverterPerField extends DocFieldConsumerPerField { fieldState.reset(docState.doc.getBoost()); - final int maxFieldLength = docState.maxFieldLength; - final boolean doInvert = consumer.start(fields, count); for(int i=0;i= maxFieldLength) { - if (docState.infoStream != null) - docState.infoStream.println("maxFieldLength " +maxFieldLength+ " reached for field " + fieldInfo.name + ", ignoring following tokens"); - break; - } hasMoreTokens = stream.incrementToken(); } diff --git a/lucene/src/java/org/apache/lucene/index/DocumentsWriter.java b/lucene/src/java/org/apache/lucene/index/DocumentsWriter.java index 999fdb117e9..2462803f94d 100644 --- a/lucene/src/java/org/apache/lucene/index/DocumentsWriter.java +++ b/lucene/src/java/org/apache/lucene/index/DocumentsWriter.java @@ -30,14 +30,16 @@ import java.util.concurrent.atomic.AtomicLong; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.document.Document; import org.apache.lucene.search.Query; -import org.apache.lucene.search.Similarity; +import org.apache.lucene.search.SimilarityProvider; import org.apache.lucene.store.AlreadyClosedException; import org.apache.lucene.store.Directory; import org.apache.lucene.store.RAMFile; import org.apache.lucene.util.ArrayUtil; +import org.apache.lucene.util.BitVector; +import org.apache.lucene.util.RamUsageEstimator; import org.apache.lucene.util.RecyclingByteBlockAllocator; import org.apache.lucene.util.ThreadInterruptedException; -import org.apache.lucene.util.RamUsageEstimator; + import static org.apache.lucene.util.ByteBlockPool.BYTE_BLOCK_MASK; import static org.apache.lucene.util.ByteBlockPool.BYTE_BLOCK_SIZE; @@ -127,22 +129,21 @@ final class DocumentsWriter { private boolean aborting; // True if an abort is pending PrintStream infoStream; - int maxFieldLength = IndexWriterConfig.UNLIMITED_FIELD_LENGTH; - Similarity similarity; + SimilarityProvider similarityProvider; // max # simultaneous threads; if there are more than // this, they wait for others to finish first private final int maxThreadStates; + // TODO: cutover to BytesRefHash // Deletes for our still-in-RAM (to be flushed next) segment - private SegmentDeletes pendingDeletes = new SegmentDeletes(); + private BufferedDeletes pendingDeletes = new BufferedDeletes(false); static class DocState { DocumentsWriter docWriter; Analyzer analyzer; - int maxFieldLength; PrintStream infoStream; - Similarity similarity; + SimilarityProvider similarityProvider; int docID; Document doc; String maxTermPrefix; @@ -191,6 +192,7 @@ final class DocumentsWriter { /** * Allocate bytes used from shared pool. */ + @Override protected byte[] newBuffer(int size) { assert size == PER_DOC_BLOCK_SIZE; return perDocAllocator.getByteBlock(); @@ -279,16 +281,16 @@ final class DocumentsWriter { private boolean closed; private final FieldInfos fieldInfos; - private final BufferedDeletes bufferedDeletes; + private final BufferedDeletesStream bufferedDeletesStream; private final IndexWriter.FlushControl flushControl; - DocumentsWriter(Directory directory, IndexWriter writer, IndexingChain indexingChain, int maxThreadStates, FieldInfos fieldInfos, BufferedDeletes bufferedDeletes) throws IOException { + DocumentsWriter(Directory directory, IndexWriter writer, IndexingChain indexingChain, int maxThreadStates, FieldInfos fieldInfos, BufferedDeletesStream bufferedDeletesStream) throws IOException { this.directory = directory; this.writer = writer; - this.similarity = writer.getConfig().getSimilarity(); + this.similarityProvider = writer.getConfig().getSimilarityProvider(); this.maxThreadStates = maxThreadStates; this.fieldInfos = fieldInfos; - this.bufferedDeletes = bufferedDeletes; + this.bufferedDeletesStream = bufferedDeletesStream; flushControl = writer.flushControl; consumer = indexingChain.getChain(this); @@ -337,6 +339,9 @@ final class DocumentsWriter { return doFlush; } + // TODO: we could check w/ FreqProxTermsWriter: if the + // term doesn't exist, don't bother buffering into the + // per-DWPT map (but still must go into the global map) boolean deleteTerm(Term term, boolean skipWait) { final boolean doFlush = flushControl.waitUpdate(0, 1, skipWait); synchronized(this) { @@ -358,17 +363,10 @@ final class DocumentsWriter { } } - synchronized void setMaxFieldLength(int maxFieldLength) { - this.maxFieldLength = maxFieldLength; + synchronized void setSimilarityProvider(SimilarityProvider similarity) { + this.similarityProvider = similarity; for(int i=0;i BD + final long delGen = bufferedDeletesStream.getNextGen(); if (pendingDeletes.any()) { - if (newSegment != null) { + if (segmentInfos.size() > 0 || newSegment != null) { + final FrozenBufferedDeletes packet = new FrozenBufferedDeletes(pendingDeletes, delGen); if (infoStream != null) { - message("flush: push buffered deletes to newSegment"); + message("flush: push buffered deletes"); } - bufferedDeletes.pushDeletes(pendingDeletes, newSegment); - } else if (segmentInfos.size() > 0) { + bufferedDeletesStream.push(packet); if (infoStream != null) { - message("flush: push buffered deletes to previously flushed segment " + segmentInfos.lastElement()); + message("flush: delGen=" + packet.gen); + } + if (newSegment != null) { + newSegment.setBufferedDeletesGen(packet.gen); } - bufferedDeletes.pushDeletes(pendingDeletes, segmentInfos.lastElement(), true); } else { if (infoStream != null) { message("flush: drop buffered deletes: no segments"); @@ -534,7 +535,9 @@ final class DocumentsWriter { // there are no segments, the deletions cannot // affect anything. } - pendingDeletes = new SegmentDeletes(); + pendingDeletes.clear(); + } else if (newSegment != null) { + newSegment.setBufferedDeletesGen(delGen); } } @@ -546,6 +549,8 @@ final class DocumentsWriter { // Lock order: IW -> DW synchronized SegmentInfo flush(IndexWriter writer, IndexFileDeleter deleter, MergePolicy mergePolicy, SegmentInfos segmentInfos) throws IOException { + final long startTime = System.currentTimeMillis(); + // We change writer's segmentInfos: assert Thread.holdsLock(writer); @@ -583,6 +588,18 @@ final class DocumentsWriter { final SegmentWriteState flushState = segWriteState(); + // Apply delete-by-docID now (delete-byDocID only + // happens when an exception is hit processing that + // doc, eg if analyzer has some problem w/ the text): + if (pendingDeletes.docIDs.size() > 0) { + flushState.deletedDocs = new BitVector(numDocs); + for(int delDocID : pendingDeletes.docIDs) { + flushState.deletedDocs.set(delDocID); + } + pendingDeletes.bytesUsed.addAndGet(-pendingDeletes.docIDs.size() * BufferedDeletes.BYTES_PER_DEL_DOCID); + pendingDeletes.docIDs.clear(); + } + newSegment = new SegmentInfo(segment, numDocs, directory, false, fieldInfos.hasProx(), flushState.segmentCodecs, false); Collection threads = new HashSet(); @@ -593,10 +610,14 @@ final class DocumentsWriter { double startMBUsed = bytesUsed()/1024./1024.; consumer.flush(threads, flushState); + newSegment.setHasVectors(flushState.hasVectors); if (infoStream != null) { message("new segment has " + (flushState.hasVectors ? "vectors" : "no vectors")); + if (flushState.deletedDocs != null) { + message("new segment has " + flushState.deletedDocs.count() + " deleted docs"); + } message("flushedFiles=" + newSegment.files()); message("flushed codecs=" + newSegment.getSegmentCodecs()); } @@ -617,6 +638,30 @@ final class DocumentsWriter { newSegment.setUseCompoundFile(true); } + // Must write deleted docs after the CFS so we don't + // slurp the del file into CFS: + if (flushState.deletedDocs != null) { + final int delCount = flushState.deletedDocs.count(); + assert delCount > 0; + newSegment.setDelCount(delCount); + newSegment.advanceDelGen(); + final String delFileName = newSegment.getDelFileName(); + boolean success2 = false; + try { + flushState.deletedDocs.write(directory, delFileName); + success2 = true; + } finally { + if (!success2) { + try { + directory.deleteFile(delFileName); + } catch (Throwable t) { + // suppress this so we keep throwing the + // original exception + } + } + } + } + if (infoStream != null) { message("flush: segment=" + newSegment); final double newSegmentSizeNoStore = newSegment.sizeInBytes(false)/1024./1024.; @@ -643,6 +688,9 @@ final class DocumentsWriter { // Lock order: IW -> DW -> BD pushDeletes(newSegment, segmentInfos); + if (infoStream != null) { + message("flush time " + (System.currentTimeMillis()-startTime) + " msec"); + } return newSegment; } @@ -650,7 +698,7 @@ final class DocumentsWriter { SegmentWriteState segWriteState() { return new SegmentWriteState(infoStream, directory, segment, fieldInfos, numDocs, writer.getConfig().getTermIndexInterval(), - SegmentCodecs.build(fieldInfos, writer.codecs), bytesUsed); + SegmentCodecs.build(fieldInfos, writer.codecs), pendingDeletes, bytesUsed); } synchronized void close() { @@ -909,8 +957,7 @@ final class DocumentsWriter { final static int BYTE_BLOCK_NOT_MASK = ~BYTE_BLOCK_MASK; /* if you increase this, you must fix field cache impl for - * getTerms/getTermsIndex requires <= 32768. Also fix - * DeltaBytesWriter's TERM_EOF if necessary. */ + * getTerms/getTermsIndex requires <= 32768. */ final static int MAX_TERM_LENGTH_UTF8 = BYTE_BLOCK_SIZE-2; /* Initial chunks size of the shared int[] blocks used to @@ -971,7 +1018,7 @@ final class DocumentsWriter { final boolean doBalance; final long deletesRAMUsed; - deletesRAMUsed = bufferedDeletes.bytesUsed(); + deletesRAMUsed = bufferedDeletesStream.bytesUsed(); synchronized(this) { if (ramBufferSize == IndexWriterConfig.DISABLE_AUTO_FLUSH || bufferIsFull) { diff --git a/lucene/src/java/org/apache/lucene/index/DocumentsWriterThreadState.java b/lucene/src/java/org/apache/lucene/index/DocumentsWriterThreadState.java index c9ab3828f3c..611098a64bc 100644 --- a/lucene/src/java/org/apache/lucene/index/DocumentsWriterThreadState.java +++ b/lucene/src/java/org/apache/lucene/index/DocumentsWriterThreadState.java @@ -35,9 +35,8 @@ final class DocumentsWriterThreadState { public DocumentsWriterThreadState(DocumentsWriter docWriter) throws IOException { this.docWriter = docWriter; docState = new DocumentsWriter.DocState(); - docState.maxFieldLength = docWriter.maxFieldLength; docState.infoStream = docWriter.infoStream; - docState.similarity = docWriter.similarity; + docState.similarityProvider = docWriter.similarityProvider; docState.docWriter = docWriter; consumer = docWriter.consumer.addThread(this); } diff --git a/lucene/src/java/org/apache/lucene/index/FieldInfo.java b/lucene/src/java/org/apache/lucene/index/FieldInfo.java index 96ace5f1f1b..bfb74209df4 100644 --- a/lucene/src/java/org/apache/lucene/index/FieldInfo.java +++ b/lucene/src/java/org/apache/lucene/index/FieldInfo.java @@ -56,7 +56,7 @@ public final class FieldInfo { this.storeOffsetWithTermVector = false; this.storePositionWithTermVector = false; this.storePayloads = false; - this.omitNorms = true; + this.omitNorms = false; this.omitTermFreqAndPositions = false; } } @@ -86,7 +86,7 @@ public final class FieldInfo { this.storePayloads = true; } if (this.omitNorms != omitNorms) { - this.omitNorms = false; // once norms are stored, always store + this.omitNorms = true; // if one require omitNorms at least once, it remains off for life } if (this.omitTermFreqAndPositions != omitTermFreqAndPositions) { this.omitTermFreqAndPositions = true; // if one require omitTermFreqAndPositions at least once, it remains off for life diff --git a/lucene/src/java/org/apache/lucene/index/FieldInfos.java b/lucene/src/java/org/apache/lucene/index/FieldInfos.java index 47d21177f58..39a3cbd90c6 100644 --- a/lucene/src/java/org/apache/lucene/index/FieldInfos.java +++ b/lucene/src/java/org/apache/lucene/index/FieldInfos.java @@ -284,14 +284,21 @@ public final class FieldInfos { } public boolean hasVectors() { - boolean hasVectors = false; for (int i = 0; i < size(); i++) { if (fieldInfo(i).storeTermVector) { - hasVectors = true; - break; + return true; } } - return hasVectors; + return false; + } + + public boolean hasNorms() { + for (int i = 0; i < size(); i++) { + if (!fieldInfo(i).omitNorms) { + return true; + } + } + return false; } public void write(Directory d, String name) throws IOException { diff --git a/lucene/src/java/org/apache/lucene/index/FieldInvertState.java b/lucene/src/java/org/apache/lucene/index/FieldInvertState.java index 9dc9ffcc8b4..8c4e92ad4ea 100644 --- a/lucene/src/java/org/apache/lucene/index/FieldInvertState.java +++ b/lucene/src/java/org/apache/lucene/index/FieldInvertState.java @@ -30,6 +30,7 @@ public final class FieldInvertState { int length; int numOverlap; int offset; + int maxTermFrequency; float boost; AttributeSource attributeSource; @@ -53,6 +54,7 @@ public final class FieldInvertState { length = 0; numOverlap = 0; offset = 0; + maxTermFrequency = 0; boost = docBoost; attributeSource = null; } @@ -73,6 +75,10 @@ public final class FieldInvertState { return length; } + public void setLength(int length) { + this.length = length; + } + /** * Get the number of terms with positionIncrement == 0. * @return the numOverlap @@ -81,6 +87,10 @@ public final class FieldInvertState { return numOverlap; } + public void setNumOverlap(int numOverlap) { + this.numOverlap = numOverlap; + } + /** * Get end offset of the last processed term. * @return the offset @@ -99,6 +109,19 @@ public final class FieldInvertState { return boost; } + public void setBoost(float boost) { + this.boost = boost; + } + + /** + * Get the maximum term-frequency encountered for any term in the field. A + * field containing "the quick brown fox jumps over the lazy dog" would have + * a value of 2, because "the" appears twice. + */ + public int getMaxTermFrequency() { + return maxTermFrequency; + } + public AttributeSource getAttributeSource() { return attributeSource; } diff --git a/lucene/src/java/org/apache/lucene/index/Fields.java b/lucene/src/java/org/apache/lucene/index/Fields.java index f3fe6542775..20e7176f4ec 100644 --- a/lucene/src/java/org/apache/lucene/index/Fields.java +++ b/lucene/src/java/org/apache/lucene/index/Fields.java @@ -30,7 +30,7 @@ public abstract class Fields { * names. This will not return null. */ public abstract FieldsEnum iterator() throws IOException; - /** Get the {@link Terms} for this field. This may return + /** Get the {@link Terms} for this field. This will return * null if the field does not exist. */ public abstract Terms terms(String field) throws IOException; diff --git a/lucene/src/java/org/apache/lucene/index/FieldsReader.java b/lucene/src/java/org/apache/lucene/index/FieldsReader.java index 96b58120e50..76c0ed23552 100644 --- a/lucene/src/java/org/apache/lucene/index/FieldsReader.java +++ b/lucene/src/java/org/apache/lucene/index/FieldsReader.java @@ -37,8 +37,10 @@ import java.io.Reader; * Class responsible for access to stored document fields. *

    * It uses <segment>.fdt and <segment>.fdx; files. + * + * @lucene.internal */ -final class FieldsReader implements Cloneable { +public final class FieldsReader implements Cloneable { private final static int FORMAT_SIZE = 4; private final FieldInfos fieldInfos; @@ -74,6 +76,23 @@ final class FieldsReader implements Cloneable { ensureOpen(); return new FieldsReader(fieldInfos, numTotalDocs, size, format, docStoreOffset, cloneableFieldsStream, cloneableIndexStream); } + + /** Verifies that the code version which wrote the segment is supported. */ + public static void checkCodeVersion(Directory dir, String segment) throws IOException { + final String indexStreamFN = IndexFileNames.segmentFileName(segment, "", IndexFileNames.FIELDS_INDEX_EXTENSION); + IndexInput idxStream = dir.openInput(indexStreamFN, 1024); + + try { + int format = idxStream.readInt(); + if (format < FieldsWriter.FORMAT_MINIMUM) + throw new IndexFormatTooOldException(indexStreamFN, format, FieldsWriter.FORMAT_MINIMUM, FieldsWriter.FORMAT_CURRENT); + if (format > FieldsWriter.FORMAT_CURRENT) + throw new IndexFormatTooNewException(indexStreamFN, format, FieldsWriter.FORMAT_MINIMUM, FieldsWriter.FORMAT_CURRENT); + } finally { + idxStream.close(); + } + + } // Used only by clone private FieldsReader(FieldInfos fieldInfos, int numTotalDocs, int size, int format, int docStoreOffset, @@ -89,11 +108,11 @@ final class FieldsReader implements Cloneable { indexStream = (IndexInput) cloneableIndexStream.clone(); } - FieldsReader(Directory d, String segment, FieldInfos fn) throws IOException { + public FieldsReader(Directory d, String segment, FieldInfos fn) throws IOException { this(d, segment, fn, BufferedIndexInput.BUFFER_SIZE, -1, 0); } - FieldsReader(Directory d, String segment, FieldInfos fn, int readBufferSize, int docStoreOffset, int size) throws IOException { + public FieldsReader(Directory d, String segment, FieldInfos fn, int readBufferSize, int docStoreOffset, int size) throws IOException { boolean success = false; isOriginal = true; try { @@ -157,7 +176,7 @@ final class FieldsReader implements Cloneable { * * @throws IOException */ - final void close() throws IOException { + public final void close() throws IOException { if (!closed) { if (fieldsStream != null) { fieldsStream.close(); @@ -178,7 +197,7 @@ final class FieldsReader implements Cloneable { } } - final int size() { + public final int size() { return size; } @@ -186,7 +205,7 @@ final class FieldsReader implements Cloneable { indexStream.seek(FORMAT_SIZE + (docID + docStoreOffset) * 8L); } - final Document doc(int n, FieldSelector fieldSelector) throws CorruptIndexException, IOException { + public final Document doc(int n, FieldSelector fieldSelector) throws CorruptIndexException, IOException { seekIndex(n); long position = indexStream.readLong(); fieldsStream.seek(position); @@ -237,7 +256,7 @@ final class FieldsReader implements Cloneable { * contiguous range of length numDocs starting with * startDocID. Returns the IndexInput (the fieldStream), * already seeked to the starting point for startDocID.*/ - final IndexInput rawDocs(int[] lengths, int startDocID, int numDocs) throws IOException { + public final IndexInput rawDocs(int[] lengths, int startDocID, int numDocs) throws IOException { seekIndex(startDocID); long startOffset = indexStream.readLong(); long lastOffset = startOffset; diff --git a/lucene/src/java/org/apache/lucene/index/FilterIndexReader.java b/lucene/src/java/org/apache/lucene/index/FilterIndexReader.java index 3393a71fdfb..4dc7cfee89e 100644 --- a/lucene/src/java/org/apache/lucene/index/FilterIndexReader.java +++ b/lucene/src/java/org/apache/lucene/index/FilterIndexReader.java @@ -19,17 +19,19 @@ package org.apache.lucene.index; import org.apache.lucene.document.Document; import org.apache.lucene.document.FieldSelector; +import org.apache.lucene.index.IndexReader.ReaderContext; import org.apache.lucene.index.values.DocValues; import org.apache.lucene.index.values.DocValuesEnum; import org.apache.lucene.store.Directory; import org.apache.lucene.util.Bits; -import org.apache.lucene.search.FieldCache; // not great (circular); used only to purge FieldCache entry on close import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.MapBackedSet; import java.io.IOException; import java.util.Collection; import java.util.Map; import java.util.Comparator; +import java.util.concurrent.ConcurrentHashMap; /** A FilterIndexReader contains another IndexReader, which it * uses as its basic source of data, possibly transforming the data along the @@ -105,6 +107,11 @@ public class FilterIndexReader extends IndexReader { public long getUniqueTermCount() throws IOException { return in.getUniqueTermCount(); } + + @Override + public long getSumTotalTermFreq() throws IOException { + return in.getSumTotalTermFreq(); + } } /** Base class for filtering {@link TermsEnum} implementations. */ @@ -141,11 +148,6 @@ public class FilterIndexReader extends IndexReader { return in.seek(text, useCache); } - @Override - public void cacheCurrentTerm() throws IOException { - in.cacheCurrentTerm(); - } - @Override public SeekStatus seek(long ord) throws IOException { return in.seek(ord); @@ -167,10 +169,15 @@ public class FilterIndexReader extends IndexReader { } @Override - public int docFreq() { + public int docFreq() throws IOException { return in.docFreq(); } + @Override + public long totalTermFreq() throws IOException { + return in.totalTermFreq(); + } + @Override public DocsEnum docs(Bits skipDocs, DocsEnum reuse) throws IOException { return in.docs(skipDocs, reuse); @@ -185,6 +192,16 @@ public class FilterIndexReader extends IndexReader { public Comparator getComparator() throws IOException { return in.getComparator(); } + + @Override + public void seek(BytesRef term, TermState state) throws IOException { + in.seek(term, state); + } + + @Override + public TermState termState() throws IOException { + return in.termState(); + } } /** Base class for filtering {@link DocsEnum} implementations. */ @@ -282,6 +299,7 @@ public class FilterIndexReader extends IndexReader { public FilterIndexReader(IndexReader in) { super(); this.in = in; + readerFinishedListeners = new MapBackedSet(new ConcurrentHashMap()); } @Override @@ -361,12 +379,6 @@ public class FilterIndexReader extends IndexReader { return in.norms(f); } - @Override - public void norms(String f, byte[] bytes, int offset) throws IOException { - ensureOpen(); - in.norms(f, bytes, offset); - } - @Override protected void doSetNorm(int d, String f, byte b) throws CorruptIndexException, IOException { in.setNorm(d, f, b); @@ -393,11 +405,6 @@ public class FilterIndexReader extends IndexReader { @Override protected void doClose() throws IOException { in.close(); - - // NOTE: only needed in case someone had asked for - // FieldCache for top-level reader (which is generally - // not a good idea): - FieldCache.DEFAULT.purge(this); } @@ -429,6 +436,11 @@ public class FilterIndexReader extends IndexReader { public IndexReader[] getSequentialSubReaders() { return in.getSequentialSubReaders(); } + + @Override + public ReaderContext getTopReaderContext() { + return in.getTopReaderContext(); + } @Override public Fields fields() throws IOException { @@ -451,4 +463,16 @@ public class FilterIndexReader extends IndexReader { buffer.append(')'); return buffer.toString(); } -} \ No newline at end of file + + @Override + public void addReaderFinishedListener(ReaderFinishedListener listener) { + super.addReaderFinishedListener(listener); + in.addReaderFinishedListener(listener); + } + + @Override + public void removeReaderFinishedListener(ReaderFinishedListener listener) { + super.removeReaderFinishedListener(listener); + in.removeReaderFinishedListener(listener); + } +} diff --git a/lucene/src/java/org/apache/lucene/index/FreqProxTermsWriter.java b/lucene/src/java/org/apache/lucene/index/FreqProxTermsWriter.java index d749d24b555..d342cb47249 100644 --- a/lucene/src/java/org/apache/lucene/index/FreqProxTermsWriter.java +++ b/lucene/src/java/org/apache/lucene/index/FreqProxTermsWriter.java @@ -20,13 +20,15 @@ package org.apache.lucene.index; import java.io.IOException; import java.util.ArrayList; import java.util.Collection; +import java.util.Comparator; import java.util.List; import java.util.Map; -import java.util.Comparator; -import org.apache.lucene.index.codecs.PostingsConsumer; import org.apache.lucene.index.codecs.FieldsConsumer; +import org.apache.lucene.index.codecs.PostingsConsumer; +import org.apache.lucene.index.codecs.TermStats; import org.apache.lucene.index.codecs.TermsConsumer; +import org.apache.lucene.util.BitVector; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.CollectionUtil; @@ -107,7 +109,7 @@ final class FreqProxTermsWriter extends TermsHashConsumer { // If this field has postings then add them to the // segment - appendPostings(fields, consumer); + appendPostings(fieldName, state, fields, consumer); for(int i=0;i ent : deletes.queries.entrySet()) { + queries[upto] = ent.getKey(); + queryLimits[upto] = ent.getValue(); + upto++; + } + bytesUsed = terms.length * BYTES_PER_DEL_TERM + queries.length * BYTES_PER_DEL_QUERY; + numTermDeletes = deletes.numTermDeletes.get(); + this.gen = gen; + } + + public Iterable termsIterable() { + return new Iterable() { + // @Override -- not until Java 1.6 + public Iterator iterator() { + return new Iterator() { + private int upto; + + // @Override -- not until Java 1.6 + public boolean hasNext() { + return upto < terms.length; + } + + // @Override -- not until Java 1.6 + public Term next() { + return terms[upto++]; + } + + // @Override -- not until Java 1.6 + public void remove() { + throw new UnsupportedOperationException(); + } + }; + } + }; + } + + public Iterable queriesIterable() { + return new Iterable() { + // @Override -- not until Java 1.6 + public Iterator iterator() { + return new Iterator() { + private int upto; + + // @Override -- not until Java 1.6 + public boolean hasNext() { + return upto < queries.length; + } + + // @Override -- not until Java 1.6 + public QueryAndLimit next() { + QueryAndLimit ret = new QueryAndLimit(queries[upto], queryLimits[upto]); + upto++; + return ret; + } + + // @Override -- not until Java 1.6 + public void remove() { + throw new UnsupportedOperationException(); + } + }; + } + }; + } + + @Override + public String toString() { + String s = ""; + if (numTermDeletes != 0) { + s += " " + numTermDeletes + " deleted terms (unique count=" + terms.length + ")"; + } + if (queries.length != 0) { + s += " " + queries.length + " deleted queries"; + } + if (bytesUsed != 0) { + s += " bytesUsed=" + bytesUsed; + } + + return s; + } + + boolean any() { + return terms.length > 0 || queries.length > 0; + } +} diff --git a/lucene/src/java/org/apache/lucene/index/IndexFileNames.java b/lucene/src/java/org/apache/lucene/index/IndexFileNames.java index ef9c4b419c6..4f14170bdfc 100644 --- a/lucene/src/java/org/apache/lucene/index/IndexFileNames.java +++ b/lucene/src/java/org/apache/lucene/index/IndexFileNames.java @@ -204,7 +204,7 @@ public final class IndexFileNames { /** * Returns true if the given filename ends with the given extension. One - * should provide a pure extension, withouth '.'. + * should provide a pure extension, without '.'. */ public static boolean matchesExtension(String filename, String ext) { // It doesn't make a difference whether we allocate a StringBuilder ourself diff --git a/lucene/src/java/org/apache/lucene/index/IndexFormatTooOldException.java b/lucene/src/java/org/apache/lucene/index/IndexFormatTooOldException.java index 9be38a91e2a..b8f9356cfd4 100644 --- a/lucene/src/java/org/apache/lucene/index/IndexFormatTooOldException.java +++ b/lucene/src/java/org/apache/lucene/index/IndexFormatTooOldException.java @@ -23,10 +23,15 @@ package org.apache.lucene.index; */ public class IndexFormatTooOldException extends CorruptIndexException { + public IndexFormatTooOldException(String filename, String version) { + super("Format version is not supported" + (filename!=null ? (" in file '" + filename + "'") : "") + + ": " + version + ". This version of Lucene only supports indexes created with release 3.0 and later."); + } + public IndexFormatTooOldException(String filename, int version, int minVersion, int maxVersion) { super("Format version is not supported" + (filename!=null ? (" in file '" + filename + "'") : "") + - ": " + version + " (needs to be between " + minVersion + " and " + maxVersion + - "). This version of Lucene only supports indexes created with release 3.0 and later."); + ": " + version + " (needs to be between " + minVersion + " and " + maxVersion + + "). This version of Lucene only supports indexes created with release 3.0 and later."); } } diff --git a/lucene/src/java/org/apache/lucene/index/IndexNotFoundException.java b/lucene/src/java/org/apache/lucene/index/IndexNotFoundException.java index 5e7107448b8..dc0a6fa0d1e 100644 --- a/lucene/src/java/org/apache/lucene/index/IndexNotFoundException.java +++ b/lucene/src/java/org/apache/lucene/index/IndexNotFoundException.java @@ -21,7 +21,7 @@ import java.io.FileNotFoundException; /** * Signals that no index was found in the Directory. Possibly because the - * directory is empty, however can slso indicate an index corruption. + * directory is empty, however can also indicate an index corruption. */ public final class IndexNotFoundException extends FileNotFoundException { diff --git a/lucene/src/java/org/apache/lucene/index/IndexReader.java b/lucene/src/java/org/apache/lucene/index/IndexReader.java index 7f2aa6f8945..0a014543eca 100644 --- a/lucene/src/java/org/apache/lucene/index/IndexReader.java +++ b/lucene/src/java/org/apache/lucene/index/IndexReader.java @@ -19,6 +19,7 @@ package org.apache.lucene.index; import org.apache.lucene.document.Document; import org.apache.lucene.document.FieldSelector; +import org.apache.lucene.search.FieldCache; // javadocs import org.apache.lucene.search.Similarity; import org.apache.lucene.index.codecs.Codec; import org.apache.lucene.index.codecs.CodecProvider; @@ -82,6 +83,62 @@ import java.util.concurrent.atomic.AtomicInteger; */ public abstract class IndexReader implements Cloneable,Closeable { + /** + * A custom listener that's invoked when the IndexReader + * is finished. + * + *

    For a SegmentReader, this listener is called only + * once all SegmentReaders sharing the same core are + * closed. At this point it is safe for apps to evict + * this reader from any caches keyed on {@link + * #getCoreCacheKey}. This is the same interface that + * {@link FieldCache} uses, internally, to evict + * entries.

    + * + *

    For other readers, this listener is called when they + * are closed.

    + * + * @lucene.experimental + */ + public static interface ReaderFinishedListener { + public void finished(IndexReader reader); + } + + // Impls must set this if they may call add/removeReaderFinishedListener: + protected volatile Collection readerFinishedListeners; + + /** Expert: adds a {@link ReaderFinishedListener}. The + * provided listener is also added to any sub-readers, if + * this is a composite reader. Also, any reader reopened + * or cloned from this one will also copy the listeners at + * the time of reopen. + * + * @lucene.experimental */ + public void addReaderFinishedListener(ReaderFinishedListener listener) { + readerFinishedListeners.add(listener); + } + + /** Expert: remove a previously added {@link ReaderFinishedListener}. + * + * @lucene.experimental */ + public void removeReaderFinishedListener(ReaderFinishedListener listener) { + readerFinishedListeners.remove(listener); + } + + protected void notifyReaderFinishedListeners() { + // Defensive (should never be null -- all impls must set + // this): + if (readerFinishedListeners != null) { + for(ReaderFinishedListener listener : readerFinishedListeners) { + listener.finished(this); + } + } + } + + protected void readerFinished() { + notifyReaderFinishedListeners(); + } + /** * Constants describing field properties, for example used for * {@link IndexReader#getFieldNames(FieldOption)}. @@ -199,6 +256,7 @@ public abstract class IndexReader implements Cloneable,Closeable { refCount.incrementAndGet(); } } + readerFinished(); } } @@ -242,24 +300,26 @@ public abstract class IndexReader implements Cloneable,Closeable { /** * Open a near real time IndexReader from the {@link org.apache.lucene.index.IndexWriter}. * - * * @param writer The IndexWriter to open from + * @param applyAllDeletes If true, all buffered deletes will + * be applied (made visible) in the returned reader. If + * false, the deletes are not applied but remain buffered + * (in IndexWriter) so that they will be applied in the + * future. Applying deletes can be costly, so if your app + * can tolerate deleted documents being returned you might + * gain some performance by passing false. * @return The new IndexReader * @throws CorruptIndexException * @throws IOException if there is a low-level IO error * - * @see #reopen(IndexWriter) + * @see #reopen(IndexWriter,boolean) * * @lucene.experimental */ - public static IndexReader open(final IndexWriter writer) throws CorruptIndexException, IOException { - return writer.getReader(); + public static IndexReader open(final IndexWriter writer, boolean applyAllDeletes) throws CorruptIndexException, IOException { + return writer.getReader(applyAllDeletes); } - - - - /** Expert: returns an IndexReader reading the index in the given * {@link IndexCommit}. You should pass readOnly=true, since it * gives much better concurrent performance, unless you @@ -305,7 +365,7 @@ public abstract class IndexReader implements Cloneable,Closeable { * @param readOnly true if no changes (deletions, norms) will be made with this IndexReader * @param termInfosIndexDivisor Subsamples which indexed * terms are loaded into RAM. This has the same effect as {@link - * IndexWriter#setTermIndexInterval} except that setting + * IndexWriterConfig#setTermIndexInterval} except that setting * must be done at indexing time while this setting can be * set per reader. When set to N, then one in every * N*termIndexInterval terms in the index is loaded into @@ -355,14 +415,17 @@ public abstract class IndexReader implements Cloneable,Closeable { * @param readOnly true if no changes (deletions, norms) will be made with this IndexReader * @param termInfosIndexDivisor Subsamples which indexed * terms are loaded into RAM. This has the same effect as {@link - * IndexWriter#setTermIndexInterval} except that setting + * IndexWriterConfig#setTermIndexInterval} except that setting * must be done at indexing time while this setting can be * set per reader. When set to N, then one in every * N*termIndexInterval terms in the index is loaded into * memory. By setting this to a value > 1 you can reduce * memory usage, at the expense of higher latency when * loading a TermInfo. The default value is 1. Set this - * to -1 to skip loading the terms index entirely. + * to -1 to skip loading the terms index entirely. This is only useful in + * advanced situations when you will only .next() through all terms; + * attempts to seek will hit an exception. + * * @throws CorruptIndexException if the index is corrupt * @throws IOException if there is a low-level IO error */ @@ -384,7 +447,7 @@ public abstract class IndexReader implements Cloneable,Closeable { * @param readOnly true if no changes (deletions, norms) will be made with this IndexReader * @param termInfosIndexDivisor Subsamples which indexed * terms are loaded into RAM. This has the same effect as {@link - * IndexWriter#setTermIndexInterval} except that setting + * IndexWriterConfig#setTermIndexInterval} except that setting * must be done at indexing time while this setting can be * set per reader. When set to N, then one in every * N*termIndexInterval terms in the index is loaded into @@ -417,7 +480,7 @@ public abstract class IndexReader implements Cloneable,Closeable { * @param readOnly true if no changes (deletions, norms) will be made with this IndexReader * @param termInfosIndexDivisor Subsamples which indexed * terms are loaded into RAM. This has the same effect as {@link - * IndexWriter#setTermIndexInterval} except that setting + * IndexWriterConfig#setTermIndexInterval} except that setting * must be done at indexing time while this setting can be * set per reader. When set to N, then one in every * N*termIndexInterval terms in the index is loaded into @@ -546,7 +609,7 @@ public abstract class IndexReader implements Cloneable,Closeable { * file descriptors, CPU time) will be consumed.

    * *

    For lower latency on reopening a reader, you should - * call {@link #setMergedSegmentWarmer} to + * call {@link IndexWriterConfig#setMergedSegmentWarmer} to * pre-warm a newly merged segment before it's committed * to the index. This is important for minimizing * index-to-search delay after a large merge.

    @@ -561,18 +624,26 @@ public abstract class IndexReader implements Cloneable,Closeable { * if you attempt to reopen any of those readers, you'll * hit an {@link AlreadyClosedException}.

    * - * @lucene.experimental - * * @return IndexReader that covers entire index plus all * changes made so far by this IndexWriter instance * + * @param writer The IndexWriter to open from + * @param applyAllDeletes If true, all buffered deletes will + * be applied (made visible) in the returned reader. If + * false, the deletes are not applied but remain buffered + * (in IndexWriter) so that they will be applied in the + * future. Applying deletes can be costly, so if your app + * can tolerate deleted documents being returned you might + * gain some performance by passing false. + * * @throws IOException + * + * @lucene.experimental */ - public IndexReader reopen(IndexWriter writer) throws CorruptIndexException, IOException { - return writer.getReader(); + public IndexReader reopen(IndexWriter writer, boolean applyAllDeletes) throws CorruptIndexException, IOException { + return writer.getReader(applyAllDeletes); } - /** * Efficiently clones the IndexReader (sharing most * internal state). @@ -935,14 +1006,6 @@ public abstract class IndexReader implements Cloneable,Closeable { */ public abstract byte[] norms(String field) throws IOException; - /** Reads the byte-encoded normalization factor for the named field of every - * document. This is used by the search code to score documents. - * - * @see org.apache.lucene.document.Field#setBoost(float) - */ - public abstract void norms(String field, byte[] bytes, int offset) - throws IOException; - /** Expert: Resets the normalization factor for the named field of the named * document. The norm represents the product of the field's {@link * org.apache.lucene.document.Fieldable#setBoost(float) boost} and its {@link Similarity#lengthNorm(String, @@ -974,26 +1037,6 @@ public abstract class IndexReader implements Cloneable,Closeable { protected abstract void doSetNorm(int doc, String field, byte value) throws CorruptIndexException, IOException; - /** Expert: Resets the normalization factor for the named field of the named - * document. - * - * @see #norms(String) - * @see Similarity#decodeNormValue(byte) - * - * @throws StaleReaderException if the index has changed - * since this reader was opened - * @throws CorruptIndexException if the index is corrupt - * @throws LockObtainFailedException if another writer - * has this index open (write.lock could not - * be obtained) - * @throws IOException if there is a low-level IO error - */ - public void setNorm(int doc, String field, float value) - throws StaleReaderException, CorruptIndexException, LockObtainFailedException, IOException { - ensureOpen(); - setNorm(doc, field, Similarity.getDefault().encodeNormValue(value)); - } - /** Flex API: returns {@link Fields} for this reader. * This method may return null if the reader has no * postings. @@ -1029,6 +1072,23 @@ public abstract class IndexReader implements Cloneable,Closeable { return terms.docFreq(term); } + /** Returns the number of documents containing the term + * t. This method returns 0 if the term or + * field does not exists. This method does not take into + * account deleted documents that have not yet been merged + * away. */ + public long totalTermFreq(String field, BytesRef term) throws IOException { + final Fields fields = fields(); + if (fields == null) { + return 0; + } + final Terms terms = fields.terms(field); + if (terms == null) { + return 0; + } + return terms.totalTermFreq(term); + } + /** This may return null if the field does not exist.*/ public Terms terms(String field) throws IOException { final Fields fields = fields(); @@ -1074,6 +1134,47 @@ public abstract class IndexReader implements Cloneable,Closeable { return null; } } + + /** + * Returns {@link DocsEnum} for the specified field and + * {@link TermState}. This may return null, if either the field or the term + * does not exists or the {@link TermState} is invalid for the underlying + * implementation.*/ + public DocsEnum termDocsEnum(Bits skipDocs, String field, BytesRef term, TermState state) throws IOException { + assert state != null; + assert field != null; + final Fields fields = fields(); + if (fields == null) { + return null; + } + final Terms terms = fields.terms(field); + if (terms != null) { + return terms.docs(skipDocs, term, state, null); + } else { + return null; + } + } + + /** + * Returns {@link DocsAndPositionsEnum} for the specified field and + * {@link TermState}. This may return null, if either the field or the term + * does not exists, the {@link TermState} is invalid for the underlying + * implementation, or positions were not stored for this term.*/ + public DocsAndPositionsEnum termPositionsEnum(Bits skipDocs, String field, BytesRef term, TermState state) throws IOException { + assert state != null; + assert field != null; + final Fields fields = fields(); + if (fields == null) { + return null; + } + final Terms terms = fields.terms(field); + if (terms != null) { + return terms.docsAndPositions(skipDocs, term, state, null); + } else { + return null; + } + } + /** Deletes the document numbered docNum. Once a document is * deleted it will not appear in TermDocs or TermPositions enumerations. @@ -1137,7 +1238,16 @@ public abstract class IndexReader implements Cloneable,Closeable { return n; } - /** Undeletes all documents currently marked as deleted in this index. + /** Undeletes all documents currently marked as deleted in + * this index. + * + *

    NOTE: this method can only recover documents marked + * for deletion but not yet removed from the index; when + * and how Lucene removes deleted documents is an + * implementation detail, subject to change from release + * to release. However, you can use {@link + * #numDeletedDocs} on the current IndexReader instance to + * see how many documents will be un-deleted. * * @throws StaleReaderException if the index has changed * since this reader was opened @@ -1360,9 +1470,7 @@ public abstract class IndexReader implements Cloneable,Closeable { } /** Expert: returns the sequential sub readers that this - * reader is logically composed of. For example, - * IndexSearcher uses this API to drive searching by one - * sub reader at a time. If this reader is not composed + * reader is logically composed of. If this reader is not composed * of sequential child readers, it should return null. * If this method returns an empty array, that means this * reader is a null reader (for example a MultiReader @@ -1377,12 +1485,33 @@ public abstract class IndexReader implements Cloneable,Closeable { public IndexReader[] getSequentialSubReaders() { return null; } - - - /** Expert: returns the docID base for this subReader. */ - public int getSubReaderDocBase(IndexReader subReader) { - throw new UnsupportedOperationException(); - } + + /** + * Expert: Returns a the root {@link ReaderContext} for this + * {@link IndexReader}'s sub-reader tree. Iff this reader is composed of sub + * readers ,ie. this reader being a composite reader, this method returns a + * {@link CompositeReaderContext} holding the reader's direct children as well as a + * view of the reader tree's atomic leaf contexts. All sub- + * {@link ReaderContext} instances referenced from this readers top-level + * context are private to this reader and are not shared with another context + * tree. For example, IndexSearcher uses this API to drive searching by one + * atomic leaf reader at a time. If this reader is not composed of child + * readers, this method returns an {@link AtomicReaderContext}. + *

    + * Note: Any of the sub-{@link CompositeReaderContext} instances reference from this + * top-level context holds a null {@link CompositeReaderContext#leaves} + * reference. Only the top-level context maintains the convenience leaf-view + * for performance reasons. + *

    + * NOTE: You should not try using sub-readers returned by this method to make + * any changes (setNorm, deleteDocument, etc.). While this might succeed for + * one composite reader (like MultiReader), it will most likely lead to index + * corruption for other readers (like DirectoryReader obtained through + * {@link #open}. Use the top-level context's reader directly. + * + * @lucene.experimental + */ + public abstract ReaderContext getTopReaderContext(); /** Expert */ public Object getCoreCacheKey() { @@ -1442,4 +1571,132 @@ public abstract class IndexReader implements Cloneable,Closeable { Fields retrieveFields() { return fields; } + + /** + * A struct like class that represents a hierarchical relationship between + * {@link IndexReader} instances. + * @lucene.experimental + */ + public static abstract class ReaderContext { + /** The reader context for this reader's immediate parent, or null if none */ + public final ReaderContext parent; + /** The actual reader */ + public final IndexReader reader; + /** true iff the reader is an atomic reader */ + public final boolean isAtomic; + /** true if this context struct represents the top level reader within the hierarchical context */ + public final boolean isTopLevel; + /** the doc base for this reader in the parent, 0 if parent is null */ + public final int docBaseInParent; + /** the ord for this reader in the parent, 0 if parent is null */ + public final int ordInParent; + + ReaderContext(ReaderContext parent, IndexReader reader, + boolean isAtomic, int ordInParent, int docBaseInParent) { + this.parent = parent; + this.reader = reader; + this.isAtomic = isAtomic; + this.docBaseInParent = docBaseInParent; + this.ordInParent = ordInParent; + this.isTopLevel = parent==null; + } + + /** + * Returns the context's leaves if this context is a top-level context + * otherwise null. + *

    + * Note: this is convenience method since leaves can always be obtained by + * walking the context tree. + */ + public AtomicReaderContext[] leaves() { + return null; + } + + /** + * Returns the context's children iff this context is a composite context + * otherwise null. + *

    + * Note: this method is a convenience method to prevent + * instanceof checks and type-casts to + * {@link CompositeReaderContext}. + */ + public ReaderContext[] children() { + return null; + } + } + + /** + * {@link ReaderContext} for composite {@link IndexReader} instance. + * @lucene.experimental + */ + public static final class CompositeReaderContext extends ReaderContext { + /** the composite readers immediate children */ + public final ReaderContext[] children; + /** the composite readers leaf reader contexts if this is the top level reader in this context */ + public final AtomicReaderContext[] leaves; + + /** + * Creates a {@link CompositeReaderContext} for intermediate readers that aren't + * not top-level readers in the current context + */ + public CompositeReaderContext(ReaderContext parent, IndexReader reader, + int ordInParent, int docbaseInParent, ReaderContext[] children) { + this(parent, reader, ordInParent, docbaseInParent, children, null); + } + + /** + * Creates a {@link CompositeReaderContext} for top-level readers with parent set to null + */ + public CompositeReaderContext(IndexReader reader, ReaderContext[] children, AtomicReaderContext[] leaves) { + this(null, reader, 0, 0, children, leaves); + } + + private CompositeReaderContext(ReaderContext parent, IndexReader reader, + int ordInParent, int docbaseInParent, ReaderContext[] children, + AtomicReaderContext[] leaves) { + super(parent, reader, false, ordInParent, docbaseInParent); + this.children = children; + this.leaves = leaves; + } + + @Override + public AtomicReaderContext[] leaves() { + return leaves; + } + + + @Override + public ReaderContext[] children() { + return children; + } + } + + /** + * {@link ReaderContext} for atomic {@link IndexReader} instances + * @lucene.experimental + */ + public static final class AtomicReaderContext extends ReaderContext { + /** The readers ord in the top-level's leaves array */ + public final int ord; + /** The readers absolute doc base */ + public final int docBase; + /** + * Creates a new {@link AtomicReaderContext} + */ + public AtomicReaderContext(ReaderContext parent, IndexReader reader, + int ord, int docBase, int leafOrd, int leafDocBase) { + super(parent, reader, true, ord, docBase); + assert reader.getSequentialSubReaders() == null : "Atomic readers must not have subreaders"; + this.ord = leafOrd; + this.docBase = leafDocBase; + } + + /** + * Creates a new {@link AtomicReaderContext} for a atomic reader without an immediate + * parent. + */ + public AtomicReaderContext(IndexReader atomicReader) { + this(null, atomicReader, 0, 0, 0, 0); + } + } } diff --git a/lucene/src/java/org/apache/lucene/index/IndexWriter.java b/lucene/src/java/org/apache/lucene/index/IndexWriter.java index 43b0281f088..44d909265b3 100644 --- a/lucene/src/java/org/apache/lucene/index/IndexWriter.java +++ b/lucene/src/java/org/apache/lucene/index/IndexWriter.java @@ -31,6 +31,7 @@ import java.util.List; import java.util.Map; import java.util.Set; import java.util.concurrent.atomic.AtomicInteger; +import java.util.concurrent.ConcurrentHashMap; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.document.Document; @@ -47,6 +48,7 @@ import org.apache.lucene.store.LockObtainFailedException; import org.apache.lucene.util.Bits; import org.apache.lucene.util.Constants; import org.apache.lucene.util.ThreadInterruptedException; +import org.apache.lucene.util.MapBackedSet; /** An IndexWriter creates and maintains an index. @@ -214,7 +216,6 @@ public class IndexWriter implements Closeable { private long lastCommitChangeCount; // last changeCount that was committed private SegmentInfos rollbackSegmentInfos; // segmentInfos we will fallback to if the commit fails - private HashMap rollbackSegments; volatile SegmentInfos pendingCommit; // set when a commit is pending (after prepareCommit() & before commit()) volatile long pendingCommitChangeCount; @@ -250,7 +251,7 @@ public class IndexWriter implements Closeable { private final AtomicInteger flushDeletesCount = new AtomicInteger(); final ReaderPool readerPool = new ReaderPool(); - final BufferedDeletes bufferedDeletes; + final BufferedDeletesStream bufferedDeletesStream; // This is a "write once" variable (like the organic dye // on a DVD-R that may or may not be heated by a laser and @@ -270,6 +271,13 @@ public class IndexWriter implements Closeable { // The PayloadProcessorProvider to use when segments are merged private PayloadProcessorProvider payloadProcessorProvider; + // for testing + boolean anyNonBulkMerges; + + IndexReader getReader() throws IOException { + return getReader(true); + } + /** * Expert: returns a readonly reader, covering all * committed as well as un-committed changes to the index. @@ -329,9 +337,10 @@ public class IndexWriter implements Closeable { * * @throws IOException */ - IndexReader getReader() throws IOException { - + IndexReader getReader(boolean applyAllDeletes) throws IOException { ensureOpen(); + + final long tStart = System.currentTimeMillis(); if (infoStream != null) { message("flush at getReader"); @@ -347,17 +356,27 @@ public class IndexWriter implements Closeable { // just like we do when loading segments_N IndexReader r; synchronized(this) { - flush(false, true); - r = new DirectoryReader(this, segmentInfos, config.getReaderTermsIndexDivisor(), codecs); + flush(false, applyAllDeletes); + r = new DirectoryReader(this, segmentInfos, config.getReaderTermsIndexDivisor(), codecs, applyAllDeletes); if (infoStream != null) { message("return reader version=" + r.getVersion() + " reader=" + r); } } maybeMerge(); + if (infoStream != null) { + message("getReader took " + (System.currentTimeMillis() - tStart) + " msec"); + } return r; } + // Used for all SegmentReaders we open + private final Collection readerFinishedListeners = new MapBackedSet(new ConcurrentHashMap()); + + Collection getReaderFinishedListeners() throws IOException { + return readerFinishedListeners; + } + /** Holds shared SegmentReader instances. IndexWriter uses * SegmentReaders for 1) applying deletes, 2) doing * merges, 3) handing out a real-time reader. This pool @@ -567,6 +586,7 @@ public class IndexWriter implements Closeable { // synchronized // Returns a ref, which we xfer to readerMap: sr = SegmentReader.get(false, info.dir, info, readBufferSize, doOpenStores, termsIndexDivisor); + sr.readerFinishedListeners = readerFinishedListeners; if (info.dir == directory) { // Only pool if reader is not external @@ -605,8 +625,6 @@ public class IndexWriter implements Closeable { } } - - /** * Obtain the number of deleted docs for a pooled reader. * If the reader isn't being pooled, the segmentInfo's @@ -662,16 +680,13 @@ public class IndexWriter implements Closeable { * IndexWriter. Additionally, calling {@link #getConfig()} and changing the * parameters does not affect that IndexWriter instance. *

    - * NOTE: by default, {@link IndexWriterConfig#getMaxFieldLength()} - * returns {@link IndexWriterConfig#UNLIMITED_FIELD_LENGTH}. Pay attention to - * whether this setting fits your application. * * @param d * the index directory. The index is either created or appended * according conf.getOpenMode(). * @param conf * the configuration settings according to which IndexWriter should - * be initalized. + * be initialized. * @throws CorruptIndexException * if the index is corrupt * @throws LockObtainFailedException @@ -689,7 +704,6 @@ public class IndexWriter implements Closeable { directory = d; analyzer = conf.getAnalyzer(); infoStream = defaultInfoStream; - maxFieldLength = conf.getMaxFieldLength(); termIndexInterval = conf.getTermIndexInterval(); mergePolicy = conf.getMergePolicy(); mergePolicy.setIndexWriter(this); @@ -697,8 +711,8 @@ public class IndexWriter implements Closeable { mergedSegmentWarmer = conf.getMergedSegmentWarmer(); codecs = conf.getCodecProvider(); - bufferedDeletes = new BufferedDeletes(messageID); - bufferedDeletes.setInfoStream(infoStream); + bufferedDeletesStream = new BufferedDeletesStream(messageID); + bufferedDeletesStream.setInfoStream(infoStream); poolReaders = conf.getReaderPooling(); OpenMode mode = conf.getOpenMode(); @@ -719,11 +733,8 @@ public class IndexWriter implements Closeable { boolean success = false; - // TODO: we should check whether this index is too old, - // and throw an IndexFormatTooOldExc up front, here, - // instead of later when merge, applyDeletes, getReader - // is attempted. I think to do this we should store the - // oldest segment's version in segments_N. + // If index is too old, reading the segments will throw + // IndexFormatTooOldException. segmentInfos = new SegmentInfos(codecs); try { if (create) { @@ -766,9 +777,8 @@ public class IndexWriter implements Closeable { setRollbackSegmentInfos(segmentInfos); - docWriter = new DocumentsWriter(directory, this, conf.getIndexingChain(), conf.getMaxThreadStates(), getCurrentFieldInfos(), bufferedDeletes); + docWriter = new DocumentsWriter(directory, this, conf.getIndexingChain(), conf.getMaxThreadStates(), getCurrentFieldInfos(), bufferedDeletesStream); docWriter.setInfoStream(infoStream); - docWriter.setMaxFieldLength(maxFieldLength); // Default deleter (for backwards compatibility) is // KeepOnlyLastCommitDeleter: @@ -854,10 +864,6 @@ public class IndexWriter implements Closeable { private synchronized void setRollbackSegmentInfos(SegmentInfos infos) { rollbackSegmentInfos = (SegmentInfos) infos.clone(); - rollbackSegments = new HashMap(); - final int size = rollbackSegmentInfos.size(); - for(int i=0;iabove for details.

    * + *

    NOTE: if you call {@link #close(boolean)} + * with false, which aborts all running merges, + * then any thread still running this method might hit a + * {@link MergePolicy.MergeAbortedException}. + * * @throws CorruptIndexException if the index is corrupt * @throws IOException if there is a low-level IO error * @see MergePolicy#findMergesForOptimize @@ -1682,6 +1674,11 @@ public class IndexWriter implements Closeable { *

    NOTE: if this method hits an OutOfMemoryError * you should immediately close the writer. See above for details.

    + * + *

    NOTE: if you call {@link #close(boolean)} + * with false, which aborts all running merges, + * then any thread still running this method might hit a + * {@link MergePolicy.MergeAbortedException}. */ public void expungeDeletes(boolean doWait) throws CorruptIndexException, IOException { @@ -1832,6 +1829,18 @@ public class IndexWriter implements Closeable { } } + /** Expert: to be used by a {@link MergePolicy} to avoid + * selecting merges for segments already being merged. + * The returned collection is not cloned, and thus is + * only safe to access if you hold IndexWriter's lock + * (which you do when IndexWriter invokes the + * MergePolicy). + * + *

    Do not alter the returned collection! */ + public synchronized Collection getMergingSegments() { + return mergingSegments; + } + /** Expert: the {@link MergeScheduler} calls this method * to retrieve the next merge requested by the * MergePolicy */ @@ -1889,7 +1898,7 @@ public class IndexWriter implements Closeable { mergePolicy.close(); mergeScheduler.close(); - bufferedDeletes.clear(); + bufferedDeletesStream.clear(); synchronized(this) { @@ -1952,8 +1961,9 @@ public class IndexWriter implements Closeable { * *

    NOTE: this method will forcefully abort all merges * in progress. If other threads are running {@link - * #optimize()} or any of the addIndexes methods, they - * will receive {@link MergePolicy.MergeAbortedException}s. + * #optimize()}, {@link #addIndexes(IndexReader[])} or + * {@link #expungeDeletes} methods, they may receive + * {@link MergePolicy.MergeAbortedException}s. */ public synchronized void deleteAll() throws IOException { try { @@ -2042,12 +2052,19 @@ public class IndexWriter implements Closeable { * will have completed once this method completes.

    */ public synchronized void waitForMerges() { + if (infoStream != null) { + message("waitForMerges"); + } while(pendingMerges.size() > 0 || runningMerges.size() > 0) { doWait(); } // sanity check assert 0 == mergingSegments.size(); + + if (infoStream != null) { + message("waitForMerges done"); + } } /** @@ -2226,6 +2243,11 @@ public class IndexWriter implements Closeable { * you should immediately close the writer. See above for details.

    * + *

    NOTE: if you call {@link #close(boolean)} + * with false, which aborts all running merges, + * then any thread still running this method might hit a + * {@link MergePolicy.MergeAbortedException}. + * * @throws CorruptIndexException if the index is corrupt * @throws IOException if there is a low-level IO error */ @@ -2453,13 +2475,13 @@ public class IndexWriter implements Closeable { } /** - * Flush all in-memory buffered udpates (adds and deletes) + * Flush all in-memory buffered updates (adds and deletes) * to the Directory. * @param triggerMerge if true, we may merge segments (if * deletes or docs were flushed) if necessary - * @param flushDeletes whether pending deletes should also + * @param applyAllDeletes whether pending deletes should also */ - protected final void flush(boolean triggerMerge, boolean flushDeletes) throws CorruptIndexException, IOException { + protected final void flush(boolean triggerMerge, boolean applyAllDeletes) throws CorruptIndexException, IOException { // NOTE: this method cannot be sync'd because // maybeMerge() in turn calls mergeScheduler.merge which @@ -2470,7 +2492,7 @@ public class IndexWriter implements Closeable { // We can be called during close, when closing==true, so we must pass false to ensureOpen: ensureOpen(false); - if (doFlush(flushDeletes) && triggerMerge) { + if (doFlush(applyAllDeletes) && triggerMerge) { maybeMerge(); } } @@ -2519,10 +2541,10 @@ public class IndexWriter implements Closeable { // tiny segments: if (flushControl.getFlushDeletes() || (config.getRAMBufferSizeMB() != IndexWriterConfig.DISABLE_AUTO_FLUSH && - bufferedDeletes.bytesUsed() > (1024*1024*config.getRAMBufferSizeMB()/2))) { + bufferedDeletesStream.bytesUsed() > (1024*1024*config.getRAMBufferSizeMB()/2))) { applyAllDeletes = true; if (infoStream != null) { - message("force apply deletes bytesUsed=" + bufferedDeletes.bytesUsed() + " vs ramBuffer=" + (1024*1024*config.getRAMBufferSizeMB())); + message("force apply deletes bytesUsed=" + bufferedDeletesStream.bytesUsed() + " vs ramBuffer=" + (1024*1024*config.getRAMBufferSizeMB())); } } } @@ -2532,12 +2554,15 @@ public class IndexWriter implements Closeable { message("apply all deletes during flush"); } flushDeletesCount.incrementAndGet(); - if (bufferedDeletes.applyDeletes(readerPool, segmentInfos, segmentInfos)) { + final BufferedDeletesStream.ApplyDeletesResult result = bufferedDeletesStream.applyDeletes(readerPool, segmentInfos); + if (result.anyDeletes) { checkpoint(); } + bufferedDeletesStream.prune(segmentInfos); + assert !bufferedDeletesStream.any(); flushControl.clearDeletes(); } else if (infoStream != null) { - message("don't apply deletes now delTermCount=" + bufferedDeletes.numTerms() + " bytesUsed=" + bufferedDeletes.bytesUsed()); + message("don't apply deletes now delTermCount=" + bufferedDeletesStream.numTerms() + " bytesUsed=" + bufferedDeletesStream.bytesUsed()); } doAfterFlush(); @@ -2563,7 +2588,7 @@ public class IndexWriter implements Closeable { */ public final long ramSizeInBytes() { ensureOpen(); - return docWriter.bytesUsed() + bufferedDeletes.bytesUsed(); + return docWriter.bytesUsed() + bufferedDeletesStream.bytesUsed(); } /** Expert: Return the number of documents currently @@ -2573,28 +2598,12 @@ public class IndexWriter implements Closeable { return docWriter.getNumDocs(); } - private int ensureContiguousMerge(MergePolicy.OneMerge merge) { - - int first = segmentInfos.indexOf(merge.segments.info(0)); - if (first == -1) - throw new MergePolicy.MergeException("could not find segment " + merge.segments.info(0).name + " in current index " + segString(), directory); - - final int numSegments = segmentInfos.size(); - - final int numSegmentsToMerge = merge.segments.size(); - for(int i=0;i= numSegments || !segmentInfos.info(first+i).equals(info)) { - if (segmentInfos.indexOf(info) == -1) - throw new MergePolicy.MergeException("MergePolicy selected a segment (" + info.name + ") that is not in the current index " + segString(), directory); - else - throw new MergePolicy.MergeException("MergePolicy selected non-contiguous segments to merge (" + merge.segString(directory) + " vs " + segString() + "), which IndexWriter (currently) cannot handle", - directory); + private void ensureValidMerge(MergePolicy.OneMerge merge) { + for(SegmentInfo info : merge.segments) { + if (segmentInfos.indexOf(info) == -1) { + throw new MergePolicy.MergeException("MergePolicy selected a segment (" + info.name + ") that is not in the current index " + segString(), directory); } } - - return first; } /** Carefully merges deletes for the segments we just @@ -2619,9 +2628,11 @@ public class IndexWriter implements Closeable { // started merging: int docUpto = 0; int delCount = 0; + long minGen = Long.MAX_VALUE; for(int i=0; i < sourceSegments.size(); i++) { SegmentInfo info = sourceSegments.info(i); + minGen = Math.min(info.getBufferedDeletesGen(), minGen); int docCount = info.docCount; SegmentReader previousReader = merge.readersClone[i]; final Bits prevDelDocs = previousReader.getDeletedDocs(); @@ -2672,9 +2683,17 @@ public class IndexWriter implements Closeable { assert mergedReader.numDeletedDocs() == delCount; mergedReader.hasChanges = delCount > 0; + + // If new deletes were applied while we were merging + // (which happens if eg commit() or getReader() is + // called during our merge), then it better be the case + // that the delGen has increased for all our merged + // segments: + assert !mergedReader.hasChanges || minGen > mergedReader.getSegmentInfo().getBufferedDeletesGen(); + + mergedReader.getSegmentInfo().setBufferedDeletesGen(minGen); } - /* FIXME if we want to support non-contiguous segment merges */ synchronized private boolean commitMerge(MergePolicy.OneMerge merge, SegmentReader mergedReader) throws IOException { assert testPoint("startCommitMerge"); @@ -2700,7 +2719,7 @@ public class IndexWriter implements Closeable { return false; } - final int start = ensureContiguousMerge(merge); + ensureValidMerge(merge); commitMergedDeletes(merge, mergedReader); @@ -2710,10 +2729,32 @@ public class IndexWriter implements Closeable { // format as well: setMergeDocStoreIsCompoundFile(merge); - segmentInfos.subList(start, start + merge.segments.size()).clear(); assert !segmentInfos.contains(merge.info); - segmentInfos.add(start, merge.info); - + + final Set mergedAway = new HashSet(merge.segments); + int segIdx = 0; + int newSegIdx = 0; + boolean inserted = false; + final int curSegCount = segmentInfos.size(); + while(segIdx < curSegCount) { + final SegmentInfo info = segmentInfos.info(segIdx++); + if (mergedAway.contains(info)) { + if (!inserted) { + segmentInfos.set(segIdx-1, merge.info); + inserted = true; + newSegIdx++; + } + } else { + segmentInfos.set(newSegIdx++, info); + } + } + assert newSegIdx == curSegCount - merge.segments.size() + 1; + segmentInfos.subList(newSegIdx, segmentInfos.size()).clear(); + + if (infoStream != null) { + message("after commit: " + segString()); + } + closeMergeReaders(merge, false); // Must note the change to segmentInfos so any commits @@ -2725,16 +2766,12 @@ public class IndexWriter implements Closeable { // disk, updating SegmentInfo, etc.: readerPool.clear(merge.segments); - // remove pending deletes of the segments - // that were merged, moving them onto the segment just - // before the merged segment - // Lock order: IW -> BD - bufferedDeletes.commitMerge(merge); - if (merge.optimize) { // cascade the optimize: segmentsToOptimize.add(merge.info); } + + return true; } @@ -2862,7 +2899,7 @@ public class IndexWriter implements Closeable { } } - ensureContiguousMerge(merge); + ensureValidMerge(merge); pendingMerges.add(merge); @@ -2889,10 +2926,6 @@ public class IndexWriter implements Closeable { final synchronized void mergeInit(MergePolicy.OneMerge merge) throws IOException { boolean success = false; try { - // Lock order: IW -> BD - if (bufferedDeletes.applyDeletes(readerPool, segmentInfos, merge.segments)) { - checkpoint(); - } _mergeInit(merge); success = true; } finally { @@ -2916,6 +2949,9 @@ public class IndexWriter implements Closeable { throw new IllegalStateException("this writer hit an OutOfMemoryError; cannot merge"); } + // TODO: is there any perf benefit to sorting + // merged segments? eg biggest to smallest? + if (merge.info != null) // mergeInit already done return; @@ -2928,6 +2964,17 @@ public class IndexWriter implements Closeable { // names. merge.info = new SegmentInfo(newSegmentName(), 0, directory, false, false, null, false); + // Lock order: IW -> BD + final BufferedDeletesStream.ApplyDeletesResult result = bufferedDeletesStream.applyDeletes(readerPool, merge.segments); + if (result.anyDeletes) { + checkpoint(); + } + + merge.info.setBufferedDeletesGen(result.gen); + + // Lock order: IW -> BD + bufferedDeletesStream.prune(segmentInfos); + Map details = new HashMap(); details.put("optimize", Boolean.toString(merge.optimize)); details.put("mergeFactor", Integer.toString(merge.segments.size())); @@ -3115,6 +3162,7 @@ public class IndexWriter implements Closeable { message("merge segmentCodecs=" + merger.getSegmentCodecs()); message("merge store matchedCount=" + merger.getMatchedSubReaderCount() + " vs " + numSegments); } + anyNonBulkMerges |= merger.getMatchedSubReaderCount() != numSegments; assert mergedDocCount == totDocCount; @@ -3280,7 +3328,7 @@ public class IndexWriter implements Closeable { // NOTE: the callers of this method should in theory // be able to do simply wait(), but, as a defense // against thread timing hazards where notifyAll() - // falls to be called, we wait for at most 1 second + // fails to be called, we wait for at most 1 second // and then return so caller can check if wait // conditions are satisfied: try { @@ -3290,6 +3338,15 @@ public class IndexWriter implements Closeable { } } + private boolean keepFullyDeletedSegments; + + /** Only for testing. + * + * @lucene.internal */ + void keepFullyDeletedSegments() { + keepFullyDeletedSegments = true; + } + // called only from assert private boolean filesExist(SegmentInfos toSync) throws IOException { Collection files = toSync.files(directory, false); @@ -3348,6 +3405,10 @@ public class IndexWriter implements Closeable { readerPool.commit(); toSync = (SegmentInfos) segmentInfos.clone(); + if (!keepFullyDeletedSegments) { + toSync.pruneDeletedSegments(); + } + assert filesExist(toSync); if (commitUserData != null) @@ -3477,7 +3538,7 @@ public class IndexWriter implements Closeable { } synchronized boolean nrtIsCurrent(SegmentInfos infos) { - return infos.version == segmentInfos.version && !docWriter.anyChanges() && !bufferedDeletes.any(); + return infos.version == segmentInfos.version && !docWriter.anyChanges() && !bufferedDeletesStream.any(); } synchronized boolean isClosed() { @@ -3644,7 +3705,7 @@ public class IndexWriter implements Closeable { final double ramBufferSizeMB = config.getRAMBufferSizeMB(); if (ramBufferSizeMB != IndexWriterConfig.DISABLE_AUTO_FLUSH) { final long limit = (long) (ramBufferSizeMB*1024*1024); - long used = bufferedDeletes.bytesUsed() + docWriter.bytesUsed(); + long used = bufferedDeletesStream.bytesUsed() + docWriter.bytesUsed(); if (used >= limit) { // DocumentsWriter may be able to free up some @@ -3652,7 +3713,7 @@ public class IndexWriter implements Closeable { // Lock order: FC -> DW docWriter.balanceRAM(); - used = bufferedDeletes.bytesUsed() + docWriter.bytesUsed(); + used = bufferedDeletesStream.bytesUsed() + docWriter.bytesUsed(); if (used >= limit) { return setFlushPending("ram full: " + reason, false); } diff --git a/lucene/src/java/org/apache/lucene/index/IndexWriterConfig.java b/lucene/src/java/org/apache/lucene/index/IndexWriterConfig.java index 34240ea5f2d..812306cf4e8 100644 --- a/lucene/src/java/org/apache/lucene/index/IndexWriterConfig.java +++ b/lucene/src/java/org/apache/lucene/index/IndexWriterConfig.java @@ -21,7 +21,8 @@ import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.index.DocumentsWriter.IndexingChain; import org.apache.lucene.index.IndexWriter.IndexReaderWarmer; import org.apache.lucene.index.codecs.CodecProvider; -import org.apache.lucene.search.Similarity; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.SimilarityProvider; import org.apache.lucene.util.Version; /** @@ -41,8 +42,6 @@ import org.apache.lucene.util.Version; */ public final class IndexWriterConfig implements Cloneable { - public static final int UNLIMITED_FIELD_LENGTH = Integer.MAX_VALUE; - /** * Specifies the open mode for {@link IndexWriter}: *

      @@ -55,7 +54,7 @@ public final class IndexWriterConfig implements Cloneable { public static enum OpenMode { CREATE, APPEND, CREATE_OR_APPEND } /** Default value is 32. Change using {@link #setTermIndexInterval(int)}. */ - public static final int DEFAULT_TERM_INDEX_INTERVAL = 32; // TODO: this should be private to the codec, not settable here + public static final int DEFAULT_TERM_INDEX_INTERVAL = 32; // TODO: this should be private to the codec, not settable here /** Denotes a flush trigger is disabled. */ public final static int DISABLE_AUTO_FLUSH = -1; @@ -113,8 +112,7 @@ public final class IndexWriterConfig implements Cloneable { private IndexDeletionPolicy delPolicy; private IndexCommit commit; private OpenMode openMode; - private int maxFieldLength; - private Similarity similarity; + private SimilarityProvider similarityProvider; private int termIndexInterval; // TODO: this should be private to the codec, not settable here private MergeScheduler mergeScheduler; private long writeLockTimeout; @@ -145,8 +143,7 @@ public final class IndexWriterConfig implements Cloneable { delPolicy = new KeepOnlyLastCommitDeletionPolicy(); commit = null; openMode = OpenMode.CREATE_OR_APPEND; - maxFieldLength = UNLIMITED_FIELD_LENGTH; - similarity = Similarity.getDefault(); + similarityProvider = IndexSearcher.getDefaultSimilarityProvider(); termIndexInterval = DEFAULT_TERM_INDEX_INTERVAL; // TODO: this should be private to the codec, not settable here mergeScheduler = new ConcurrentMergeScheduler(); writeLockTimeout = WRITE_LOCK_TIMEOUT; @@ -219,37 +216,6 @@ public final class IndexWriterConfig implements Cloneable { return delPolicy; } - /** - * The maximum number of terms that will be indexed for a single field in a - * document. This limits the amount of memory required for indexing, so that - * collections with very large files will not crash the indexing process by - * running out of memory. This setting refers to the number of running terms, - * not to the number of different terms. - *

      - * NOTE: this silently truncates large documents, excluding from the - * index all terms that occur further in the document. If you know your source - * documents are large, be sure to set this value high enough to accomodate - * the expected size. If you set it to {@link #UNLIMITED_FIELD_LENGTH}, then - * the only limit is your memory, but you should anticipate an - * OutOfMemoryError. - *

      - * By default it is set to {@link #UNLIMITED_FIELD_LENGTH}. - */ - public IndexWriterConfig setMaxFieldLength(int maxFieldLength) { - this.maxFieldLength = maxFieldLength; - return this; - } - - /** - * Returns the maximum number of terms that will be indexed for a single field - * in a document. - * - * @see #setMaxFieldLength(int) - */ - public int getMaxFieldLength() { - return maxFieldLength; - } - /** * Expert: allows to open a certain commit point. The default is null which * opens the latest commit point. @@ -269,25 +235,22 @@ public final class IndexWriterConfig implements Cloneable { } /** - * Expert: set the {@link Similarity} implementation used by this IndexWriter. + * Expert: set the {@link SimilarityProvider} implementation used by this IndexWriter. *

      - * NOTE: the similarity cannot be null. If null is passed, - * the similarity will be set to the default. - * - * @see Similarity#setDefault(Similarity) + * NOTE: the similarity provider cannot be null. If null is passed, + * the similarity provider will be set to the default implementation (unspecified). */ - public IndexWriterConfig setSimilarity(Similarity similarity) { - this.similarity = similarity == null ? Similarity.getDefault() : similarity; + public IndexWriterConfig setSimilarityProvider(SimilarityProvider similarityProvider) { + this.similarityProvider = similarityProvider == null ? IndexSearcher.getDefaultSimilarityProvider() : similarityProvider; return this; } /** - * Expert: returns the {@link Similarity} implementation used by this - * IndexWriter. This defaults to the current value of - * {@link Similarity#getDefault()}. + * Expert: returns the {@link SimilarityProvider} implementation used by this + * IndexWriter. */ - public Similarity getSimilarity() { - return similarity; + public SimilarityProvider getSimilarityProvider() { + return similarityProvider; } /** @@ -589,10 +552,13 @@ public final class IndexWriterConfig implements Cloneable { /** Sets the termsIndexDivisor passed to any readers that * IndexWriter opens, for example when applying deletes * or creating a near-real-time reader in {@link - * IndexWriter#getReader}. */ + * IndexWriter#getReader}. If you pass -1, the terms index + * won't be loaded by the readers. This is only useful in + * advanced situations when you will only .next() through + * all terms; attempts to seek will hit an exception. */ public IndexWriterConfig setReaderTermsIndexDivisor(int divisor) { - if (divisor <= 0) { - throw new IllegalArgumentException("divisor must be >= 1 (got " + divisor + ")"); + if (divisor <= 0 && divisor != -1) { + throw new IllegalArgumentException("divisor must be >= 1, or -1 (got " + divisor + ")"); } readerTermsIndexDivisor = divisor; return this; @@ -611,8 +577,7 @@ public final class IndexWriterConfig implements Cloneable { sb.append("delPolicy=").append(delPolicy.getClass().getName()).append("\n"); sb.append("commit=").append(commit == null ? "null" : commit).append("\n"); sb.append("openMode=").append(openMode).append("\n"); - sb.append("maxFieldLength=").append(maxFieldLength).append("\n"); - sb.append("similarity=").append(similarity.getClass().getName()).append("\n"); + sb.append("similarityProvider=").append(similarityProvider.getClass().getName()).append("\n"); sb.append("termIndexInterval=").append(termIndexInterval).append("\n"); // TODO: this should be private to the codec, not settable here sb.append("mergeScheduler=").append(mergeScheduler.getClass().getName()).append("\n"); sb.append("default WRITE_LOCK_TIMEOUT=").append(WRITE_LOCK_TIMEOUT).append("\n"); diff --git a/lucene/src/java/org/apache/lucene/index/LogByteSizeMergePolicy.java b/lucene/src/java/org/apache/lucene/index/LogByteSizeMergePolicy.java index 520cb4a8f16..7ef2902099f 100644 --- a/lucene/src/java/org/apache/lucene/index/LogByteSizeMergePolicy.java +++ b/lucene/src/java/org/apache/lucene/index/LogByteSizeMergePolicy.java @@ -30,9 +30,14 @@ public class LogByteSizeMergePolicy extends LogMergePolicy { * or larger will never be merged. @see setMaxMergeMB */ public static final double DEFAULT_MAX_MERGE_MB = 2048; + /** Default maximum segment size. A segment of this size + * or larger will never be merged during optimize. @see setMaxMergeMBForOptimize */ + public static final double DEFAULT_MAX_MERGE_MB_FOR_OPTIMIZE = Long.MAX_VALUE; + public LogByteSizeMergePolicy() { minMergeSize = (long) (DEFAULT_MIN_MERGE_MB*1024*1024); maxMergeSize = (long) (DEFAULT_MAX_MERGE_MB*1024*1024); + maxMergeSizeForOptimize = (long) (DEFAULT_MAX_MERGE_MB_FOR_OPTIMIZE*1024*1024); } @Override @@ -63,6 +68,23 @@ public class LogByteSizeMergePolicy extends LogMergePolicy { return ((double) maxMergeSize)/1024/1024; } + /**

      Determines the largest segment (measured by total + * byte size of the segment's files, in MB) that may be + * merged with other segments during optimize. Setting + * it low will leave the index with more than 1 segment, + * even if {@link IndexWriter#optimize()} is called.*/ + public void setMaxMergeMBForOptimize(double mb) { + maxMergeSizeForOptimize = (long) (mb*1024*1024); + } + + /** Returns the largest segment (measured by total byte + * size of the segment's files, in MB) that may be merged + * with other segments during optimize. + * @see #setMaxMergeMBForOptimize */ + public double getMaxMergeMBForOptimize() { + return ((double) maxMergeSizeForOptimize)/1024/1024; + } + /** Sets the minimum size for the lowest level segments. * Any segments below this size are considered to be on * the same level (even if they vary drastically in size) diff --git a/lucene/src/java/org/apache/lucene/index/LogDocMergePolicy.java b/lucene/src/java/org/apache/lucene/index/LogDocMergePolicy.java index a86111c3f46..42ec5136145 100644 --- a/lucene/src/java/org/apache/lucene/index/LogDocMergePolicy.java +++ b/lucene/src/java/org/apache/lucene/index/LogDocMergePolicy.java @@ -31,9 +31,10 @@ public class LogDocMergePolicy extends LogMergePolicy { public LogDocMergePolicy() { minMergeSize = DEFAULT_MIN_MERGE_DOCS; - // maxMergeSize is never used by LogDocMergePolicy; set + // maxMergeSize(ForOptimize) are never used by LogDocMergePolicy; set // it to Long.MAX_VALUE to disable it maxMergeSize = Long.MAX_VALUE; + maxMergeSizeForOptimize = Long.MAX_VALUE; } @Override diff --git a/lucene/src/java/org/apache/lucene/index/LogMergePolicy.java b/lucene/src/java/org/apache/lucene/index/LogMergePolicy.java index 357460c1d9d..1925a78d74d 100644 --- a/lucene/src/java/org/apache/lucene/index/LogMergePolicy.java +++ b/lucene/src/java/org/apache/lucene/index/LogMergePolicy.java @@ -18,6 +18,11 @@ package org.apache.lucene.index; */ import java.io.IOException; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.Comparator; +import java.util.List; import java.util.Set; /**

      This class implements a {@link MergePolicy} that tries @@ -63,7 +68,11 @@ public abstract class LogMergePolicy extends MergePolicy { protected long minMergeSize; protected long maxMergeSize; + // Although the core MPs set it explicitly, we must default in case someone + // out there wrote his own LMP ... + protected long maxMergeSizeForOptimize = Long.MAX_VALUE; protected int maxMergeDocs = DEFAULT_MAX_MERGE_DOCS; + protected boolean requireContiguousMerge = false; protected double noCFSRatio = DEFAULT_NO_CFS_RATIO; @@ -102,6 +111,21 @@ public abstract class LogMergePolicy extends MergePolicy { writer.get().message("LMP: " + message); } + /** If true, merges must be in-order slice of the + * segments. If false, then the merge policy is free to + * pick any segments. The default is false, which is + * in general more efficient than true since it gives the + * merge policy more freedom to pick closely sized + * segments. */ + public void setRequireContiguousMerge(boolean v) { + requireContiguousMerge = v; + } + + /** See {@link #setRequireContiguousMerge}. */ + public boolean getRequireContiguousMerge() { + return requireContiguousMerge; + } + /**

      Returns the number of segments that are merged at * once and also controls the total number of segments * allowed to accumulate in the index.

      */ @@ -240,9 +264,9 @@ public abstract class LogMergePolicy extends MergePolicy { int start = last - 1; while (start >= 0) { SegmentInfo info = infos.info(start); - if (size(info) > maxMergeSize || sizeDocs(info) > maxMergeDocs) { + if (size(info) > maxMergeSizeForOptimize || sizeDocs(info) > maxMergeDocs) { if (verbose()) { - message("optimize: skip segment=" + info + ": size is > maxMergeSize (" + maxMergeSize + ") or sizeDocs is > maxMergeDocs (" + maxMergeDocs + ")"); + message("optimize: skip segment=" + info + ": size is > maxMergeSize (" + maxMergeSizeForOptimize + ") or sizeDocs is > maxMergeDocs (" + maxMergeDocs + ")"); } // need to skip that segment + add a merge for the 'right' segments, // unless there is only 1 which is optimized. @@ -326,9 +350,12 @@ public abstract class LogMergePolicy extends MergePolicy { } /** Returns the merges necessary to optimize the index. - * This merge policy defines "optimized" to mean only one - * segment in the index, where that segment has no - * deletions pending nor separate norms, and it is in + * This merge policy defines "optimized" to mean only the + * requested number of segments is left in the index, and + * respects the {@link #maxMergeSizeForOptimize} setting. + * By default, and assuming {@code maxNumSegments=1}, only + * one segment will be left in the index, where that segment + * has no deletions pending nor separate norms, and it is in * compound file format if the current useCompoundFile * setting is true. This method returns multiple merges * (mergeFactor at a time) so the {@link MergeScheduler} @@ -350,6 +377,8 @@ public abstract class LogMergePolicy extends MergePolicy { } return null; } + + // TODO: handle non-contiguous merge case differently? // Find the newest (rightmost) segment that needs to // be optimized (other segments may have been flushed @@ -382,7 +411,7 @@ public abstract class LogMergePolicy extends MergePolicy { boolean anyTooLarge = false; for (int i = 0; i < last; i++) { SegmentInfo info = infos.info(i); - if (size(info) > maxMergeSize || sizeDocs(info) > maxMergeDocs) { + if (size(info) > maxMergeSizeForOptimize || sizeDocs(info) > maxMergeDocs) { anyTooLarge = true; break; } @@ -448,6 +477,36 @@ public abstract class LogMergePolicy extends MergePolicy { return spec; } + private static class SegmentInfoAndLevel implements Comparable { + SegmentInfo info; + float level; + int index; + + public SegmentInfoAndLevel(SegmentInfo info, float level, int index) { + this.info = info; + this.level = level; + this.index = index; + } + + // Sorts largest to smallest + public int compareTo(SegmentInfoAndLevel other) { + if (level < other.level) + return 1; + else if (level > other.level) + return -1; + else + return 0; + } + } + + private static class SortByIndex implements Comparator { + public int compare(SegmentInfoAndLevel o1, SegmentInfoAndLevel o2) { + return o1.index - o2.index; + } + } + + private static final SortByIndex sortByIndex = new SortByIndex(); + /** Checks if any merges are now necessary and returns a * {@link MergePolicy.MergeSpecification} if so. A merge * is necessary when there are more than {@link @@ -464,17 +523,37 @@ public abstract class LogMergePolicy extends MergePolicy { // Compute levels, which is just log (base mergeFactor) // of the size of each segment - float[] levels = new float[numSegments]; + final List levels = new ArrayList(); final float norm = (float) Math.log(mergeFactor); + final Collection mergingSegments = writer.get().getMergingSegments(); + for(int i=0;i subReaderToSlice = new HashMap(); private boolean[] decrefOnClose; // remember which subreaders to decRef on close private int maxDoc = 0; private int numDocs = -1; @@ -48,7 +48,7 @@ public class MultiReader extends IndexReader implements Cloneable { * @param subReaders set of (sub)readers */ public MultiReader(IndexReader... subReaders) throws IOException { - initialize(subReaders, true); + topLevelContext = initialize(subReaders, true); } /** @@ -60,14 +60,13 @@ public class MultiReader extends IndexReader implements Cloneable { * @param subReaders set of (sub)readers */ public MultiReader(IndexReader[] subReaders, boolean closeSubReaders) throws IOException { - initialize(subReaders, closeSubReaders); + topLevelContext = initialize(subReaders, closeSubReaders); } - private void initialize(IndexReader[] subReaders, boolean closeSubReaders) throws IOException { + private ReaderContext initialize(IndexReader[] subReaders, boolean closeSubReaders) throws IOException { this.subReaders = subReaders.clone(); starts = new int[subReaders.length + 1]; // build starts array decrefOnClose = new boolean[subReaders.length]; - for (int i = 0; i < subReaders.length; i++) { starts[i] = maxDoc; maxDoc += subReaders[i].maxDoc(); // compute maxDocs @@ -82,14 +81,10 @@ public class MultiReader extends IndexReader implements Cloneable { if (subReaders[i].hasDeletions()) { hasDeletions = true; } - - final ReaderUtil.Slice slice = new ReaderUtil.Slice(starts[i], - subReaders[i].maxDoc(), - i); - subReaderToSlice.put(subReaders[i], slice); } - starts[subReaders.length] = maxDoc; + readerFinishedListeners = new MapBackedSet(new ConcurrentHashMap()); + return ReaderUtil.buildReaderContext(this); } @Override @@ -97,11 +92,6 @@ public class MultiReader extends IndexReader implements Cloneable { throw new UnsupportedOperationException(""); } - @Override - public int getSubReaderDocBase(IndexReader subReader) { - return subReaderToSlice.get(subReader).start; - } - @Override public Fields fields() throws IOException { throw new UnsupportedOperationException("please use MultiFields.getFields, or wrap your IndexReader with SlowMultiReaderWrapper, if you really need a top level Fields"); @@ -316,12 +306,6 @@ public class MultiReader extends IndexReader implements Cloneable { throw new UnsupportedOperationException("please use MultiNorms.norms, or wrap your IndexReader with SlowMultiReaderWrapper, if you really need a top level norms"); } - @Override - public synchronized void norms(String field, byte[] result, int offset) - throws IOException { - throw new UnsupportedOperationException("please use MultiNorms.norms, or wrap your IndexReader with SlowMultiReaderWrapper, if you really need a top level norms"); - } - @Override protected void doSetNorm(int n, String field, byte value) throws CorruptIndexException, IOException { @@ -363,11 +347,6 @@ public class MultiReader extends IndexReader implements Cloneable { subReaders[i].close(); } } - - // NOTE: only needed in case someone had asked for - // FieldCache for top-level reader (which is generally - // not a good idea): - FieldCache.DEFAULT.purge(this); } @Override @@ -403,4 +382,25 @@ public class MultiReader extends IndexReader implements Cloneable { public IndexReader[] getSequentialSubReaders() { return subReaders; } + + @Override + public ReaderContext getTopReaderContext() { + return topLevelContext; + } + + @Override + public void addReaderFinishedListener(ReaderFinishedListener listener) { + super.addReaderFinishedListener(listener); + for(IndexReader sub : subReaders) { + sub.addReaderFinishedListener(listener); + } + } + + @Override + public void removeReaderFinishedListener(ReaderFinishedListener listener) { + super.removeReaderFinishedListener(listener); + for(IndexReader sub : subReaders) { + sub.removeReaderFinishedListener(listener); + } + } } diff --git a/lucene/src/java/org/apache/lucene/index/MultiTerms.java b/lucene/src/java/org/apache/lucene/index/MultiTerms.java index 4e265c056e6..2da5db54df8 100644 --- a/lucene/src/java/org/apache/lucene/index/MultiTerms.java +++ b/lucene/src/java/org/apache/lucene/index/MultiTerms.java @@ -76,6 +76,19 @@ public final class MultiTerms extends Terms { } } + @Override + public long getSumTotalTermFreq() throws IOException { + long sum = 0; + for(Terms terms : subs) { + final long v = terms.getSumTotalTermFreq(); + if (v == -1) { + return -1; + } + sum += v; + } + return sum; + } + @Override public Comparator getComparator() { return termComp; diff --git a/lucene/src/java/org/apache/lucene/index/MultiTermsEnum.java b/lucene/src/java/org/apache/lucene/index/MultiTermsEnum.java index 02e21b17ffc..f3283939e04 100644 --- a/lucene/src/java/org/apache/lucene/index/MultiTermsEnum.java +++ b/lucene/src/java/org/apache/lucene/index/MultiTermsEnum.java @@ -90,13 +90,6 @@ public final class MultiTermsEnum extends TermsEnum { return current; } - @Override - public void cacheCurrentTerm() throws IOException { - for(int i=0;i getComparator() { return termComp; @@ -264,7 +257,7 @@ public final class MultiTermsEnum extends TermsEnum { } @Override - public int docFreq() { + public int docFreq() throws IOException { int sum = 0; for(int i=0;i> byField = new HashMap>(); + if (!fieldInfos.hasNorms()) { + return; + } + // Typically, each thread will have encountered the same // field. So first we collate by field, ie, all // per-thread field instances that correspond to the @@ -137,7 +139,7 @@ final class NormsWriter extends InvertedDocEndConsumer { // Fill hole for(;upto> readerToFields = new HashMap>(); private List storedFieldReaders = new ArrayList(); private Map normsCache = new HashMap(); - + private final ReaderContext topLevelReaderContext = new AtomicReaderContext(this); private int maxDoc; private int numDocs; private boolean hasDeletions; @@ -76,6 +76,7 @@ public class ParallelReader extends IndexReader { public ParallelReader(boolean closeSubReaders) throws IOException { super(); this.incRefReaders = !closeSubReaders; + readerFinishedListeners = new MapBackedSet(new ConcurrentHashMap()); } /** {@inheritDoc} */ @@ -92,7 +93,7 @@ public class ParallelReader extends IndexReader { buffer.append(')'); return buffer.toString(); } - + /** Add an IndexReader. * @throws IOException if there is a low-level IO error */ @@ -452,29 +453,14 @@ public class ParallelReader extends IndexReader { return bytes; if (!hasNorms(field)) return null; + if (normsCache.containsKey(field)) // cached omitNorms, not missing key + return null; bytes = MultiNorms.norms(reader, field); normsCache.put(field, bytes); return bytes; } - @Override - public synchronized void norms(String field, byte[] result, int offset) - throws IOException { - // TODO: maybe optimize - ensureOpen(); - IndexReader reader = fieldToReader.get(field); - if (reader==null) - return; - - byte[] norms = norms(field); - if (norms == null) { - Arrays.fill(result, offset, result.length, Similarity.getDefault().encodeNormValue(1.0f)); - } else { - System.arraycopy(norms, 0, result, offset, maxDoc()); - } - } - @Override protected void doSetNorm(int n, String field, byte value) throws CorruptIndexException, IOException { @@ -560,8 +546,6 @@ public class ParallelReader extends IndexReader { readers.get(i).close(); } } - - FieldCache.DEFAULT.purge(this); } @Override @@ -574,6 +558,26 @@ public class ParallelReader extends IndexReader { } return fieldSet; } + @Override + public ReaderContext getTopReaderContext() { + return topLevelReaderContext; + } + + @Override + public void addReaderFinishedListener(ReaderFinishedListener listener) { + super.addReaderFinishedListener(listener); + for (IndexReader reader : readers) { + reader.addReaderFinishedListener(listener); + } + } + + @Override + public void removeReaderFinishedListener(ReaderFinishedListener listener) { + super.removeReaderFinishedListener(listener); + for (IndexReader reader : readers) { + reader.removeReaderFinishedListener(listener); + } + } } diff --git a/lucene/src/java/org/apache/lucene/index/PayloadProcessorProvider.java b/lucene/src/java/org/apache/lucene/index/PayloadProcessorProvider.java index e9fe11adfb8..bf825c1dacd 100644 --- a/lucene/src/java/org/apache/lucene/index/PayloadProcessorProvider.java +++ b/lucene/src/java/org/apache/lucene/index/PayloadProcessorProvider.java @@ -24,7 +24,7 @@ import org.apache.lucene.util.BytesRef; /** * Provides a {@link DirPayloadProcessor} to be used for a {@link Directory}. - * This allows using differnt {@link DirPayloadProcessor}s for different + * This allows using different {@link DirPayloadProcessor}s for different * directories, for e.g. to perform different processing of payloads of * different directories. *

      diff --git a/lucene/src/java/org/apache/lucene/index/PerFieldCodecWrapper.java b/lucene/src/java/org/apache/lucene/index/PerFieldCodecWrapper.java index 2decf76b178..9df1c1acc20 100644 --- a/lucene/src/java/org/apache/lucene/index/PerFieldCodecWrapper.java +++ b/lucene/src/java/org/apache/lucene/index/PerFieldCodecWrapper.java @@ -224,6 +224,7 @@ final class PerFieldCodecWrapper extends Codec { } } + @Override public FieldsProducer fieldsProducer(SegmentReadState state) throws IOException { return new FieldsReader(state.dir, state.fieldInfos, state.segmentInfo, @@ -233,7 +234,7 @@ final class PerFieldCodecWrapper extends Codec { @Override public void files(Directory dir, SegmentInfo info, String codecId, Set files) throws IOException { - // ignore codecid sicne segmentCodec will assign it per codec + // ignore codecid since segmentCodec will assign it per codec segmentCodecs.files(dir, info, files); } diff --git a/lucene/src/java/org/apache/lucene/index/PersistentSnapshotDeletionPolicy.java b/lucene/src/java/org/apache/lucene/index/PersistentSnapshotDeletionPolicy.java index fc09266c377..f4869ea926a 100644 --- a/lucene/src/java/org/apache/lucene/index/PersistentSnapshotDeletionPolicy.java +++ b/lucene/src/java/org/apache/lucene/index/PersistentSnapshotDeletionPolicy.java @@ -103,7 +103,7 @@ public class PersistentSnapshotDeletionPolicy extends SnapshotDeletionPolicy { * @param mode * specifies whether a new index should be created, deleting all * existing snapshots information (immediately), or open an existing - * index, initializing the class with the snapsthots information. + * index, initializing the class with the snapshots information. * @param matchVersion * specifies the {@link Version} that should be used when opening the * IndexWriter. diff --git a/lucene/src/java/org/apache/lucene/index/SegmentDeletes.java b/lucene/src/java/org/apache/lucene/index/SegmentDeletes.java deleted file mode 100644 index 1bb7f028c44..00000000000 --- a/lucene/src/java/org/apache/lucene/index/SegmentDeletes.java +++ /dev/null @@ -1,188 +0,0 @@ -package org.apache.lucene.index; - -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.SortedMap; -import java.util.TreeMap; -import java.util.concurrent.atomic.AtomicLong; -import java.util.concurrent.atomic.AtomicInteger; - -import org.apache.lucene.search.Query; -import org.apache.lucene.util.RamUsageEstimator; - -/** Holds buffered deletes, by docID, term or query for a - * single segment. This is used to hold buffered pending - * deletes against the to-be-flushed segment as well as - * per-segment deletes for each segment in the index. */ - -// NOTE: we are sync'd by BufferedDeletes, ie, all access to -// instances of this class is via sync'd methods on -// BufferedDeletes -class SegmentDeletes { - - /* Rough logic: HashMap has an array[Entry] w/ varying - load factor (say 2 * POINTER). Entry is object w/ Term - key, Integer val, int hash, Entry next - (OBJ_HEADER + 3*POINTER + INT). Term is object w/ - String field and String text (OBJ_HEADER + 2*POINTER). - We don't count Term's field since it's interned. - Term's text is String (OBJ_HEADER + 4*INT + POINTER + - OBJ_HEADER + string.length*CHAR). Integer is - OBJ_HEADER + INT. */ - final static int BYTES_PER_DEL_TERM = 8*RamUsageEstimator.NUM_BYTES_OBJECT_REF + 5*RamUsageEstimator.NUM_BYTES_OBJECT_HEADER + 6*RamUsageEstimator.NUM_BYTES_INT; - - /* Rough logic: del docIDs are List. Say list - allocates ~2X size (2*POINTER). Integer is OBJ_HEADER - + int */ - final static int BYTES_PER_DEL_DOCID = 2*RamUsageEstimator.NUM_BYTES_OBJECT_REF + RamUsageEstimator.NUM_BYTES_OBJECT_HEADER + RamUsageEstimator.NUM_BYTES_INT; - - /* Rough logic: HashMap has an array[Entry] w/ varying - load factor (say 2 * POINTER). Entry is object w/ - Query key, Integer val, int hash, Entry next - (OBJ_HEADER + 3*POINTER + INT). Query we often - undercount (say 24 bytes). Integer is OBJ_HEADER + INT. */ - final static int BYTES_PER_DEL_QUERY = 5*RamUsageEstimator.NUM_BYTES_OBJECT_REF + 2*RamUsageEstimator.NUM_BYTES_OBJECT_HEADER + 2*RamUsageEstimator.NUM_BYTES_INT + 24; - - // TODO: many of the deletes stored here will map to - // Integer.MAX_VALUE; we could be more efficient for this - // case ie use a SortedSet not a SortedMap. But: Java's - // SortedSet impls are simply backed by a Map so we won't - // save anything unless we do something custom... - final AtomicInteger numTermDeletes = new AtomicInteger(); - final SortedMap terms = new TreeMap(); - final Map queries = new HashMap(); - final List docIDs = new ArrayList(); - - public static final Integer MAX_INT = Integer.valueOf(Integer.MAX_VALUE); - - final AtomicLong bytesUsed = new AtomicLong(); - - private final static boolean VERBOSE_DELETES = false; - - @Override - public String toString() { - if (VERBOSE_DELETES) { - return "SegmentDeletes [numTerms=" + numTermDeletes + ", terms=" + terms - + ", queries=" + queries + ", docIDs=" + docIDs + ", bytesUsed=" - + bytesUsed + "]"; - } else { - String s = ""; - if (numTermDeletes.get() != 0) { - s += " " + numTermDeletes.get() + " deleted terms (unique count=" + terms.size() + ")"; - } - if (queries.size() != 0) { - s += " " + queries.size() + " deleted queries"; - } - if (docIDs.size() != 0) { - s += " " + docIDs.size() + " deleted docIDs"; - } - if (bytesUsed.get() != 0) { - s += " bytesUsed=" + bytesUsed.get(); - } - - return s; - } - } - - void update(SegmentDeletes in, boolean noLimit) { - numTermDeletes.addAndGet(in.numTermDeletes.get()); - for (Map.Entry ent : in.terms.entrySet()) { - final Term term = ent.getKey(); - if (!terms.containsKey(term)) { - // only incr bytesUsed if this term wasn't already buffered: - bytesUsed.addAndGet(BYTES_PER_DEL_TERM); - } - final Integer limit; - if (noLimit) { - limit = MAX_INT; - } else { - limit = ent.getValue(); - } - terms.put(term, limit); - } - - for (Map.Entry ent : in.queries.entrySet()) { - final Query query = ent.getKey(); - if (!queries.containsKey(query)) { - // only incr bytesUsed if this query wasn't already buffered: - bytesUsed.addAndGet(BYTES_PER_DEL_QUERY); - } - final Integer limit; - if (noLimit) { - limit = MAX_INT; - } else { - limit = ent.getValue(); - } - queries.put(query, limit); - } - - // docIDs never move across segments and the docIDs - // should already be cleared - } - - public void addQuery(Query query, int docIDUpto) { - queries.put(query, docIDUpto); - bytesUsed.addAndGet(BYTES_PER_DEL_QUERY); - } - - public void addDocID(int docID) { - docIDs.add(Integer.valueOf(docID)); - bytesUsed.addAndGet(BYTES_PER_DEL_DOCID); - } - - public void addTerm(Term term, int docIDUpto) { - Integer current = terms.get(term); - if (current != null && docIDUpto < current) { - // Only record the new number if it's greater than the - // current one. This is important because if multiple - // threads are replacing the same doc at nearly the - // same time, it's possible that one thread that got a - // higher docID is scheduled before the other - // threads. If we blindly replace than we can get - // double-doc in the segment. - return; - } - - terms.put(term, Integer.valueOf(docIDUpto)); - numTermDeletes.incrementAndGet(); - if (current == null) { - bytesUsed.addAndGet(BYTES_PER_DEL_TERM + term.bytes.length); - } - } - - void clear() { - terms.clear(); - queries.clear(); - docIDs.clear(); - numTermDeletes.set(0); - bytesUsed.set(0); - } - - void clearDocIDs() { - bytesUsed.addAndGet(-docIDs.size()*BYTES_PER_DEL_DOCID); - docIDs.clear(); - } - - boolean any() { - return terms.size() > 0 || docIDs.size() > 0 || queries.size() > 0; - } -} diff --git a/lucene/src/java/org/apache/lucene/index/SegmentInfo.java b/lucene/src/java/org/apache/lucene/index/SegmentInfo.java index 1c414934ac9..31838b1fd96 100644 --- a/lucene/src/java/org/apache/lucene/index/SegmentInfo.java +++ b/lucene/src/java/org/apache/lucene/index/SegmentInfo.java @@ -20,6 +20,7 @@ package org.apache.lucene.index; import org.apache.lucene.store.Directory; import org.apache.lucene.store.IndexOutput; import org.apache.lucene.store.IndexInput; +import org.apache.lucene.util.Constants; import org.apache.lucene.index.codecs.Codec; import org.apache.lucene.index.codecs.CodecProvider; import org.apache.lucene.index.codecs.DefaultSegmentInfosWriter; @@ -67,10 +68,11 @@ public final class SegmentInfo { private boolean isCompoundFile; - private List files; // cached list of files that this segment uses + private volatile List files; // cached list of files that this segment uses // in the Directory - long sizeInBytes = -1; // total byte size of all of our files (computed on demand) + private volatile long sizeInBytesNoStore = -1; // total byte size of all but the store files (computed on demand) + private volatile long sizeInBytesWithStore = -1; // total byte size of all of our files (computed on demand) private int docStoreOffset; // if this segment shares stored fields & vectors, this // offset is where in that file this segment's docs begin @@ -88,6 +90,17 @@ public final class SegmentInfo { private Map diagnostics; + // Tracks the Lucene version this segment was created with, since 3.1. Null + // indicates an older than 3.0 index, and it's used to detect a too old index. + // The format expected is "x.y" - "2.x" for pre-3.0 indexes (or null), and + // specific versions afterwards ("3.0", "3.1" etc.). + // see Constants.LUCENE_MAIN_VERSION. + private String version; + + // NOTE: only used in-RAM by IW to track buffered deletes; + // this is never written to/read from the Directory + private long bufferedDeletesGen; + public SegmentInfo(String name, int docCount, Directory dir, boolean isCompoundFile, boolean hasProx, SegmentCodecs segmentCodecs, boolean hasVectors) { this.name = name; @@ -96,10 +109,12 @@ public final class SegmentInfo { delGen = NO; this.isCompoundFile = isCompoundFile; this.docStoreOffset = -1; + this.docStoreSegment = name; this.hasProx = hasProx; this.segmentCodecs = segmentCodecs; this.hasVectors = hasVectors; delCount = 0; + version = Constants.LUCENE_MAIN_VERSION; } /** @@ -107,11 +122,13 @@ public final class SegmentInfo { */ void reset(SegmentInfo src) { clearFiles(); + version = src.version; name = src.name; docCount = src.docCount; dir = src.dir; delGen = src.delGen; docStoreOffset = src.docStoreOffset; + docStoreSegment = src.docStoreSegment; docStoreIsCompoundFile = src.docStoreIsCompoundFile; hasVectors = src.hasVectors; hasProx = src.hasProx; @@ -146,6 +163,9 @@ public final class SegmentInfo { */ public SegmentInfo(Directory dir, int format, IndexInput input, CodecProvider codecs) throws IOException { this.dir = dir; + if (format <= DefaultSegmentInfosWriter.FORMAT_3_1) { + version = input.readString(); + } name = input.readString(); docCount = input.readInt(); delGen = input.readLong(); @@ -219,26 +239,41 @@ public final class SegmentInfo { } } } - - /** Returns total size in bytes of all of files used by - * this segment. */ + + /** + * Returns total size in bytes of all of files used by this segment (if + * {@code includeDocStores} is true), or the size of all files except the + * store files otherwise. + */ public long sizeInBytes(boolean includeDocStores) throws IOException { - if (sizeInBytes == -1) { - List files = files(); - final int size = files.size(); - sizeInBytes = 0; - for(int i=0;i + * NOTE: this method is used for internal purposes only - you should + * not modify the version of a SegmentInfo, or it may result in unexpected + * exceptions thrown when you attempt to open the index. + * + * @lucene.internal + */ + public void setVersion(String version) { + this.version = version; + } + + /** Returns the version of the code which wrote the segment. */ + public String getVersion() { + return version; + } + + long getBufferedDeletesGen() { + return bufferedDeletesGen; + } + + void setBufferedDeletesGen(long v) { + bufferedDeletesGen = v; + } } diff --git a/lucene/src/java/org/apache/lucene/index/SegmentInfos.java b/lucene/src/java/org/apache/lucene/index/SegmentInfos.java index 896e6222266..493279ee17b 100644 --- a/lucene/src/java/org/apache/lucene/index/SegmentInfos.java +++ b/lucene/src/java/org/apache/lucene/index/SegmentInfos.java @@ -308,6 +308,19 @@ public final class SegmentInfos extends Vector { } } + /** Prunes any segment whose docs are all deleted. */ + public void pruneDeletedSegments() { + int segIdx = 0; + while(segIdx < size()) { + final SegmentInfo info = info(segIdx); + if (info.getDelCount() == info.docCount) { + remove(segIdx); + } else { + segIdx++; + } + } + } + /** * Returns a copy of this instance, also copying each * SegmentInfo. diff --git a/lucene/src/java/org/apache/lucene/index/SegmentMerger.java b/lucene/src/java/org/apache/lucene/index/SegmentMerger.java index 5aec216579b..da76904f011 100644 --- a/lucene/src/java/org/apache/lucene/index/SegmentMerger.java +++ b/lucene/src/java/org/apache/lucene/index/SegmentMerger.java @@ -19,6 +19,7 @@ package org.apache.lucene.index; import java.io.IOException; import java.util.ArrayList; +import java.util.Arrays; import java.util.Collection; import java.util.List; import java.util.concurrent.atomic.AtomicLong; @@ -59,7 +60,7 @@ final class SegmentMerger { private int mergedDocs; - private final CheckAbort checkAbort; + private final MergeState.CheckAbort checkAbort; /** Maximum number of contiguous documents to bulk-copy when merging stored fields */ @@ -78,9 +79,9 @@ final class SegmentMerger { this.fieldInfos = fieldInfos; segment = name; if (merge != null) { - checkAbort = new CheckAbort(merge, directory); + checkAbort = new MergeState.CheckAbort(merge, directory); } else { - checkAbort = new CheckAbort(null, null) { + checkAbort = new MergeState.CheckAbort(null, null) { @Override public void work(double units) throws MergeAbortedException { // do nothing @@ -266,7 +267,7 @@ final class SegmentMerger { // details. throw new RuntimeException("mergeFields produced an invalid result: docCount is " + docCount + " but fdx file size is " + fdxFileLength + " file=" + fileName + " file exists?=" + directory.fileExists(fileName) + "; now aborting this merge to prevent index corruption"); - segmentWriteState = new SegmentWriteState(null, directory, segment, fieldInfos, docCount, termIndexInterval, codecInfo, new AtomicLong(0)); + segmentWriteState = new SegmentWriteState(null, directory, segment, fieldInfos, docCount, termIndexInterval, codecInfo, null, new AtomicLong(0)); return docCount; } @@ -508,6 +509,7 @@ final class SegmentMerger { mergeState.hasPayloadProcessorProvider = payloadProcessorProvider != null; mergeState.dirPayloadProcessor = new PayloadProcessorProvider.DirPayloadProcessor[mergeState.readerCount]; mergeState.currentPayloadProcessor = new PayloadProcessorProvider.PayloadProcessor[mergeState.readerCount]; + mergeState.checkAbort = checkAbort; docBase = 0; int inputDocBase = 0; @@ -571,13 +573,6 @@ final class SegmentMerger { } private void mergeNorms() throws IOException { - // get needed buffer size by finding the largest segment - int bufferSize = 0; - for (IndexReader reader : readers) { - bufferSize = Math.max(bufferSize, reader.maxDoc()); - } - - byte[] normBuffer = null; IndexOutput output = null; try { for (int i = 0, numFieldInfos = fieldInfos.size(); i < numFieldInfos; i++) { @@ -587,12 +582,15 @@ final class SegmentMerger { output = directory.createOutput(IndexFileNames.segmentFileName(segment, "", IndexFileNames.NORMS_EXTENSION)); output.writeBytes(NORMS_HEADER,NORMS_HEADER.length); } - if (normBuffer == null) { - normBuffer = new byte[bufferSize]; - } for (IndexReader reader : readers) { final int maxDoc = reader.maxDoc(); - reader.norms(fi.name, normBuffer, 0); + byte normBuffer[] = reader.norms(fi.name); + if (normBuffer == null) { + // Can be null if this segment doesn't have + // any docs with this field + normBuffer = new byte[maxDoc]; + Arrays.fill(normBuffer, (byte)0); + } if (!reader.hasDeletions()) { //optimized case for segments without deleted docs output.writeBytes(normBuffer, maxDoc); @@ -616,31 +614,4 @@ final class SegmentMerger { } } } - - static class CheckAbort { - private double workCount; - private MergePolicy.OneMerge merge; - private Directory dir; - public CheckAbort(MergePolicy.OneMerge merge, Directory dir) { - this.merge = merge; - this.dir = dir; - } - - /** - * Records the fact that roughly units amount of work - * have been done since this method was last called. - * When adding time-consuming code into SegmentMerger, - * you should test different values for units to ensure - * that the time in between calls to merge.checkAborted - * is up to ~ 1 second. - */ - public void work(double units) throws MergePolicy.MergeAbortedException { - workCount += units; - if (workCount >= 10000.0) { - merge.checkAborted(dir); - workCount = 0; - } - } - } - } diff --git a/lucene/src/java/org/apache/lucene/index/SegmentReader.java b/lucene/src/java/org/apache/lucene/index/SegmentReader.java index fe4a7bfee4e..f8a0598f72d 100644 --- a/lucene/src/java/org/apache/lucene/index/SegmentReader.java +++ b/lucene/src/java/org/apache/lucene/index/SegmentReader.java @@ -19,7 +19,6 @@ package org.apache.lucene.index; import java.io.IOException; import java.util.ArrayList; -import java.util.Arrays; import java.util.Collection; import java.util.HashMap; import java.util.HashSet; @@ -32,7 +31,6 @@ import java.util.concurrent.atomic.AtomicInteger; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.FieldSelector; -import org.apache.lucene.search.Similarity; import org.apache.lucene.store.BufferedIndexInput; import org.apache.lucene.store.Directory; import org.apache.lucene.store.IndexInput; @@ -46,7 +44,6 @@ import org.apache.lucene.index.values.Ints; import org.apache.lucene.index.values.DocValues; import org.apache.lucene.index.values.Floats; import org.apache.lucene.index.values.Type; -import org.apache.lucene.search.FieldCache; // not great (circular); used only to purge FieldCache entry on close import org.apache.lucene.util.BytesRef; /** @@ -57,7 +54,7 @@ public class SegmentReader extends IndexReader implements Cloneable { private SegmentInfo si; private int readBufferSize; - + private final ReaderContext readerContext = new AtomicReaderContext(this); CloseableThreadLocal fieldsReaderLocal = new FieldsReaderLocal(); CloseableThreadLocal termVectorsLocal = new CloseableThreadLocal(); @@ -190,13 +187,9 @@ public class SegmentReader extends IndexReader implements Cloneable { storeCFSReader.close(); } - // Force FieldCache to evict our entries at this - // point. If the exception occurred while - // initializing the core readers, then - // origInstance will be null, and we don't want - // to call FieldCache.purge (it leads to NPE): + // Now, notify any ReaderFinished listeners: if (origInstance != null) { - FieldCache.DEFAULT.purge(origInstance); + origInstance.notifyReaderFinishedListeners(); } } } @@ -233,13 +226,7 @@ public class SegmentReader extends IndexReader implements Cloneable { assert storeDir != null; } - final String storesSegment; - if (si.getDocStoreOffset() != -1) { - storesSegment = si.getDocStoreSegment(); - } else { - storesSegment = segment; - } - + final String storesSegment = si.getDocStoreSegment(); fieldsReaderOrig = new FieldsReader(storeDir, storesSegment, fieldInfos, readBufferSize, si.getDocStoreOffset(), si.docCount); @@ -342,29 +329,6 @@ public class SegmentReader extends IndexReader implements Cloneable { } } - // Load bytes but do not cache them if they were not - // already cached - public synchronized void bytes(byte[] bytesOut, int offset, int len) throws IOException { - assert refCount > 0 && (origNorm == null || origNorm.refCount > 0); - if (bytes != null) { - // Already cached -- copy from cache: - assert len <= maxDoc(); - System.arraycopy(bytes, 0, bytesOut, offset, len); - } else { - // Not cached - if (origNorm != null) { - // Ask origNorm to load - origNorm.bytes(bytesOut, offset, len); - } else { - // We are orig -- read ourselves from disk: - synchronized(in) { - in.seek(normSeek); - in.readBytes(bytesOut, offset, len, false); - } - } - } - } - // Load & cache full bytes array. Returns bytes. public synchronized byte[] bytes() throws IOException { assert refCount > 0 && (origNorm == null || origNorm.refCount > 0); @@ -669,6 +633,7 @@ public class SegmentReader extends IndexReader implements Cloneable { clone.si = si; clone.readBufferSize = readBufferSize; clone.pendingDeleteCount = pendingDeleteCount; + clone.readerFinishedListeners = readerFinishedListeners; if (!openReadOnly && hasChanges) { // My pending changes transfer to the new reader @@ -999,22 +964,6 @@ public class SegmentReader extends IndexReader implements Cloneable { norm.copyOnWrite()[doc] = value; // set the value } - /** Read norms into a pre-allocated array. */ - @Override - public synchronized void norms(String field, byte[] bytes, int offset) - throws IOException { - - ensureOpen(); - Norm norm = norms.get(field); - if (norm == null) { - Arrays.fill(bytes, offset, bytes.length, Similarity.getDefault().encodeNormValue(1.0f)); - return; - } - - norm.bytes(bytes, offset, maxDoc()); - } - - private void openNorms(Directory cfsDir, int readBufferSize) throws IOException { long nextNormSeek = SegmentMerger.NORMS_HEADER.length; //skip header (header unused for now) int maxDoc = maxDoc(); @@ -1191,6 +1140,11 @@ public class SegmentReader extends IndexReader implements Cloneable { buffer.append(si.toString(core.dir, pendingDeleteCount)); return buffer.toString(); } + + @Override + public ReaderContext getTopReaderContext() { + return readerContext; + } /** * Return the name of the segment this reader is reading. @@ -1254,6 +1208,16 @@ public class SegmentReader extends IndexReader implements Cloneable { return core.termsIndexDivisor; } + @Override + protected void readerFinished() { + // Do nothing here -- we have more careful control on + // when to notify that a SegmentReader has finished, + // because a given core is shared across many cloned + // SegmentReaders. We only notify once that core is no + // longer used (all SegmentReaders sharing it have been + // closed). + } + @Override public DocValues docValues(String field) throws IOException { return core.fields.docValues(field); diff --git a/lucene/src/java/org/apache/lucene/index/SegmentWriteState.java b/lucene/src/java/org/apache/lucene/index/SegmentWriteState.java index 3ef036f4aba..98cfdb4edf3 100644 --- a/lucene/src/java/org/apache/lucene/index/SegmentWriteState.java +++ b/lucene/src/java/org/apache/lucene/index/SegmentWriteState.java @@ -23,6 +23,7 @@ import java.util.HashSet; import java.util.concurrent.atomic.AtomicLong; import org.apache.lucene.store.Directory; +import org.apache.lucene.util.BitVector; /** * @lucene.experimental @@ -37,6 +38,16 @@ public class SegmentWriteState { public final Collection flushedFiles; public final AtomicLong bytesUsed; + // Deletes to apply while we are flushing the segment. A + // Term is enrolled in here if it was deleted at one + // point, and it's mapped to the docIDUpto, meaning any + // docID < docIDUpto containing this term should be + // deleted. + public final BufferedDeletes segDeletes; + + // Lazily created: + public BitVector deletedDocs; + final SegmentCodecs segmentCodecs; public final String codecId; @@ -62,8 +73,9 @@ public class SegmentWriteState { public SegmentWriteState(PrintStream infoStream, Directory directory, String segmentName, FieldInfos fieldInfos, - int numDocs, int termIndexInterval, SegmentCodecs segmentCodecs, AtomicLong bytesUsed) { + int numDocs, int termIndexInterval, SegmentCodecs segmentCodecs, BufferedDeletes segDeletes, AtomicLong bytesUsed) { this.infoStream = infoStream; + this.segDeletes = segDeletes; this.directory = directory; this.segmentName = segmentName; this.fieldInfos = fieldInfos; @@ -88,6 +100,7 @@ public class SegmentWriteState { segmentCodecs = state.segmentCodecs; flushedFiles = state.flushedFiles; this.codecId = codecId; + segDeletes = state.segDeletes; bytesUsed = state.bytesUsed; } } diff --git a/lucene/src/java/org/apache/lucene/index/SlowMultiReaderWrapper.java b/lucene/src/java/org/apache/lucene/index/SlowMultiReaderWrapper.java index 7a29870586f..78c834f8008 100644 --- a/lucene/src/java/org/apache/lucene/index/SlowMultiReaderWrapper.java +++ b/lucene/src/java/org/apache/lucene/index/SlowMultiReaderWrapper.java @@ -18,13 +18,9 @@ package org.apache.lucene.index; */ import java.io.IOException; -import java.util.Arrays; import java.util.HashMap; -import java.util.List; -import java.util.ArrayList; import java.util.Map; -import org.apache.lucene.search.Similarity; import org.apache.lucene.util.Bits; import org.apache.lucene.util.ReaderUtil; // javadoc @@ -55,10 +51,12 @@ import org.apache.lucene.index.MultiReader; // javadoc public final class SlowMultiReaderWrapper extends FilterIndexReader { + private final ReaderContext readerContext; private final Map normsCache = new HashMap(); public SlowMultiReaderWrapper(IndexReader other) { super(other); + readerContext = new AtomicReaderContext(this); // emulate atomic reader! } @Override @@ -85,22 +83,17 @@ public final class SlowMultiReaderWrapper extends FilterIndexReader { return bytes; if (!hasNorms(field)) return null; - + if (normsCache.containsKey(field)) // cached omitNorms, not missing key + return null; + bytes = MultiNorms.norms(in, field); normsCache.put(field, bytes); return bytes; } - + @Override - public synchronized void norms(String field, byte[] bytes, int offset) throws IOException { - // TODO: maybe optimize - ensureOpen(); - byte[] norms = norms(field); - if (norms == null) { - Arrays.fill(bytes, offset, bytes.length, Similarity.getDefault().encodeNormValue(1.0f)); - } else { - System.arraycopy(norms, 0, bytes, offset, maxDoc()); - } + public ReaderContext getTopReaderContext() { + return readerContext; } @Override diff --git a/lucene/src/java/org/apache/lucene/index/codecs/TermState.java b/lucene/src/java/org/apache/lucene/index/TermState.java similarity index 53% rename from lucene/src/java/org/apache/lucene/index/codecs/TermState.java rename to lucene/src/java/org/apache/lucene/index/TermState.java index df437f54dd8..3279366b589 100644 --- a/lucene/src/java/org/apache/lucene/index/codecs/TermState.java +++ b/lucene/src/java/org/apache/lucene/index/TermState.java @@ -1,4 +1,4 @@ -package org.apache.lucene.index.codecs; +package org.apache.lucene.index; /** * Licensed to the Apache Software Foundation (ASF) under one or more @@ -17,27 +17,23 @@ package org.apache.lucene.index.codecs; * limitations under the License. */ -import org.apache.lucene.index.DocsEnum; // for javadocs - -import org.apache.lucene.index.codecs.standard.StandardPostingsReader; // javadocs - /** - * Holds all state required for {@link StandardPostingsReader} - * to produce a {@link DocsEnum} without re-seeking the - * terms dict. + * Encapsulates all required internal state to position the associated + * {@link TermsEnum} without re-seeking. + * + * @see TermsEnum#seek(org.apache.lucene.util.BytesRef, TermState) + * @see TermsEnum#termState() * @lucene.experimental */ +public abstract class TermState implements Cloneable { -public class TermState implements Cloneable { - public long ord; // ord for this term - public long filePointer; // fp into the terms dict primary file (_X.tis) - public int docFreq; // how many docs have this term - - public void copy(TermState other) { - ord = other.ord; - filePointer = other.filePointer; - docFreq = other.docFreq; - } + /** + * Copies the content of the given {@link TermState} to this instance + * + * @param other + * the TermState to copy + */ + public abstract void copyFrom(TermState other); @Override public Object clone() { @@ -47,10 +43,5 @@ public class TermState implements Cloneable { // should not happen throw new RuntimeException(cnse); } - } - - @Override - public String toString() { - return "tis.fp=" + filePointer + " docFreq=" + docFreq + " ord=" + ord; - } -} + } +} \ No newline at end of file diff --git a/lucene/src/java/org/apache/lucene/index/TermVectorsTermsWriterPerField.java b/lucene/src/java/org/apache/lucene/index/TermVectorsTermsWriterPerField.java index 4938538d054..2b4e35e09cd 100644 --- a/lucene/src/java/org/apache/lucene/index/TermVectorsTermsWriterPerField.java +++ b/lucene/src/java/org/apache/lucene/index/TermVectorsTermsWriterPerField.java @@ -281,6 +281,7 @@ final class TermVectorsTermsWriterPerField extends TermsHashConsumerPerField { int[] lastOffsets; // Last offset we saw int[] lastPositions; // Last position where this term occurred + @Override ParallelPostingsArray newInstance(int size) { return new TermVectorsPostingsArray(size); } diff --git a/lucene/src/java/org/apache/lucene/index/Terms.java b/lucene/src/java/org/apache/lucene/index/Terms.java index 362476754f6..e68293097cf 100644 --- a/lucene/src/java/org/apache/lucene/index/Terms.java +++ b/lucene/src/java/org/apache/lucene/index/Terms.java @@ -57,6 +57,18 @@ public abstract class Terms { } } + /** Returns the number of documents containing the + * specified term text. Returns 0 if the term does not + * exist. */ + public long totalTermFreq(BytesRef text) throws IOException { + final TermsEnum termsEnum = getThreadTermsEnum(); + if (termsEnum.seek(text) == TermsEnum.SeekStatus.FOUND) { + return termsEnum.totalTermFreq(); + } else { + return 0; + } + } + /** Get {@link DocsEnum} for the specified term. This * method may return null if the term does not exist. */ public DocsEnum docs(Bits skipDocs, BytesRef text, DocsEnum reuse) throws IOException { @@ -80,11 +92,59 @@ public abstract class Terms { } } + /** + * Expert: Get {@link DocsEnum} for the specified {@link TermState}. + * This method may return null if the term does not exist. + * + * @see TermsEnum#termState() + * @see TermsEnum#seek(BytesRef, TermState) */ + public DocsEnum docs(Bits skipDocs, BytesRef term, TermState termState, DocsEnum reuse) throws IOException { + final TermsEnum termsEnum = getThreadTermsEnum(); + termsEnum.seek(term, termState); + return termsEnum.docs(skipDocs, reuse); + } + + /** + * Get {@link DocsEnum} for the specified {@link TermState}. This + * method will may return null if the term does not exists, or positions were + * not indexed. + * + * @see TermsEnum#termState() + * @see TermsEnum#seek(BytesRef, TermState) */ + public DocsAndPositionsEnum docsAndPositions(Bits skipDocs, BytesRef term, TermState termState, DocsAndPositionsEnum reuse) throws IOException { + final TermsEnum termsEnum = getThreadTermsEnum(); + termsEnum.seek(term, termState); + return termsEnum.docsAndPositions(skipDocs, reuse); + } + public long getUniqueTermCount() throws IOException { throw new UnsupportedOperationException("this reader does not implement getUniqueTermCount()"); } - protected TermsEnum getThreadTermsEnum() throws IOException { + /** Returns the sum of {@link TermsEnum#totalTermFreq} for + * all terms in this field, or -1 if this measure isn't + * stored by the codec (or if this fields omits term freq + * and positions). Note that, just like other term + * measures, this measure does not take deleted documents + * into account. */ + public abstract long getSumTotalTermFreq() throws IOException; + + /** + * Returns a thread-private {@link TermsEnum} instance. Obtaining + * {@link TermsEnum} from this method might be more efficient than using + * {@link #iterator()} directly since this method doesn't necessarily create a + * new {@link TermsEnum} instance. + *

      + * NOTE: {@link TermsEnum} instances obtained from this method must not be + * shared across threads. The enum should only be used within a local context + * where other threads can't access it. + * + * @return a thread-private {@link TermsEnum} instance + * @throws IOException + * if an IOException occurs + * @lucene.internal + */ + public TermsEnum getThreadTermsEnum() throws IOException { TermsEnum termsEnum = threadEnums.get(); if (termsEnum == null) { termsEnum = iterator(); diff --git a/lucene/src/java/org/apache/lucene/index/TermsEnum.java b/lucene/src/java/org/apache/lucene/index/TermsEnum.java index 9901a966d2b..2d6b6c1133e 100644 --- a/lucene/src/java/org/apache/lucene/index/TermsEnum.java +++ b/lucene/src/java/org/apache/lucene/index/TermsEnum.java @@ -73,7 +73,34 @@ public abstract class TermsEnum { * may be before or after the current ord. See {@link * #seek(BytesRef)}. */ public abstract SeekStatus seek(long ord) throws IOException; - + + /** + * Expert: Seeks a specific position by {@link TermState} previously obtained + * from {@link #termState()}. Callers should maintain the {@link TermState} to + * use this method. Low-level implementations may position the TermsEnum + * without re-seeking the term dictionary. + *

      + * Seeking by {@link TermState} should only be used iff the enum the state was + * obtained from and the enum the state is used for seeking are obtained from + * the same {@link IndexReader}, otherwise a {@link #seek(BytesRef, TermState)} call can + * leave the enum in undefined state. + *

      + * NOTE: Using this method with an incompatible {@link TermState} might leave + * this {@link TermsEnum} in undefined state. On a segment level + * {@link TermState} instances are compatible only iff the source and the + * target {@link TermsEnum} operate on the same field. If operating on segment + * level, TermState instances must not be used across segments. + *

      + * NOTE: A seek by {@link TermState} might not restore the + * {@link AttributeSource}'s state. {@link AttributeSource} states must be + * maintained separately if this method is used. + * @param term the term the TermState corresponds to + * @param state the {@link TermState} + * */ + public void seek(BytesRef term, TermState state) throws IOException { + seek(term); + } + /** Increments the enumeration to the next element. * Returns the resulting term, or null if the end was * hit. The returned BytesRef may be re-used across calls @@ -97,7 +124,15 @@ public abstract class TermsEnum { * term. Do not call this before calling next() for the * first time, after next() returns null or seek returns * {@link SeekStatus#END}.*/ - public abstract int docFreq(); + public abstract int docFreq() throws IOException; + + /** Returns the total number of occurrences of this term + * across all documents (the sum of the freq() for each + * doc that has this term). This will be -1 if the + * codec doesn't support this measure. Note that, like + * other term measures, this measure does not take + * deleted documents into account. */ + public abstract long totalTermFreq() throws IOException; /** Get {@link DocsEnum} for the current term. Do not * call this before calling {@link #next} or {@link @@ -116,6 +151,25 @@ public abstract class TermsEnum { * the postings by this codec. */ public abstract DocsAndPositionsEnum docsAndPositions(Bits skipDocs, DocsAndPositionsEnum reuse) throws IOException; + /** + * Expert: Returns the TermsEnums internal state to position the TermsEnum + * without re-seeking the term dictionary. + *

      + * NOTE: A seek by {@link TermState} might not capture the + * {@link AttributeSource}'s state. Callers must maintain the + * {@link AttributeSource} states separately + * + * @see TermState + * @see #seek(BytesRef, TermState) + */ + public TermState termState() throws IOException { + return new TermState() { + @Override + public void copyFrom(TermState other) { + } + }; + } + /** Return the {@link BytesRef} Comparator used to sort * terms provided by the iterator. This may return * null if there are no terms. Callers may invoke this @@ -123,10 +177,6 @@ public abstract class TermsEnum { * instance & reuse it. */ public abstract Comparator getComparator() throws IOException; - /** Optional optimization hint: informs the codec that the - * current term is likely to be re-seek'd-to soon. */ - public abstract void cacheCurrentTerm() throws IOException; - /** An empty TermsEnum for quickly returning an empty instance e.g. * in {@link org.apache.lucene.search.MultiTermQuery} *

      Please note: This enum should be unmodifiable, @@ -141,9 +191,6 @@ public abstract class TermsEnum { @Override public SeekStatus seek(long ord) { return SeekStatus.END; } - @Override - public void cacheCurrentTerm() {} - @Override public BytesRef term() { throw new IllegalStateException("this method should never be called"); @@ -158,6 +205,11 @@ public abstract class TermsEnum { public int docFreq() { throw new IllegalStateException("this method should never be called"); } + + @Override + public long totalTermFreq() { + throw new IllegalStateException("this method should never be called"); + } @Override public long ord() { @@ -183,5 +235,15 @@ public abstract class TermsEnum { public synchronized AttributeSource attributes() { return super.attributes(); } + + @Override + public TermState termState() throws IOException { + throw new IllegalStateException("this method should never be called"); + } + + @Override + public void seek(BytesRef term, TermState state) throws IOException { + throw new IllegalStateException("this method should never be called"); + } }; } diff --git a/lucene/src/java/org/apache/lucene/index/codecs/BlockTermState.java b/lucene/src/java/org/apache/lucene/index/codecs/BlockTermState.java new file mode 100644 index 00000000000..40bf8e95e11 --- /dev/null +++ b/lucene/src/java/org/apache/lucene/index/codecs/BlockTermState.java @@ -0,0 +1,56 @@ +package org.apache.lucene.index.codecs; +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.index.DocsEnum; // javadocs +import org.apache.lucene.index.OrdTermState; +import org.apache.lucene.index.TermState; + +/** + * Holds all state required for {@link PostingsReaderBase} + * to produce a {@link DocsEnum} without re-seeking the + * terms dict. + */ +public class BlockTermState extends OrdTermState { + public int docFreq; // how many docs have this term + public long totalTermFreq; // total number of occurrences of this term + + public int termCount; // term ord are in the current block + public long blockFilePointer; // fp into the terms dict primary file (_X.tib) that holds this term + + public int blockTermCount; // how many terms in current block + + @Override + public void copyFrom(TermState _other) { + assert _other instanceof BlockTermState : "can not copy from " + _other.getClass().getName(); + BlockTermState other = (BlockTermState) _other; + super.copyFrom(_other); + docFreq = other.docFreq; + totalTermFreq = other.totalTermFreq; + termCount = other.termCount; + blockFilePointer = other.blockFilePointer; + + // NOTE: don't copy blockTermCount; + // it's "transient": used only by the "primary" + // termState, and regenerated on seek by TermState + } + + @Override + public String toString() { + return super.toString() + "ord=" + ord + " docFreq=" + docFreq + " totalTermFreq=" + totalTermFreq + " termCount=" + termCount + " blockFP=" + blockFilePointer; + } +} diff --git a/lucene/src/java/org/apache/lucene/index/codecs/BlockTermsReader.java b/lucene/src/java/org/apache/lucene/index/codecs/BlockTermsReader.java new file mode 100644 index 00000000000..93882869c26 --- /dev/null +++ b/lucene/src/java/org/apache/lucene/index/codecs/BlockTermsReader.java @@ -0,0 +1,748 @@ +package org.apache.lucene.index.codecs; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.Closeable; +import java.io.IOException; +import java.util.Collection; +import java.util.Comparator; +import java.util.Iterator; +import java.util.TreeMap; + +import org.apache.lucene.index.DocsAndPositionsEnum; +import org.apache.lucene.index.DocsEnum; +import org.apache.lucene.index.FieldInfo; +import org.apache.lucene.index.FieldInfos; +import org.apache.lucene.index.FieldsEnum; +import org.apache.lucene.index.IndexFileNames; +import org.apache.lucene.index.SegmentInfo; +import org.apache.lucene.index.TermState; +import org.apache.lucene.index.Terms; +import org.apache.lucene.index.TermsEnum; +import org.apache.lucene.index.codecs.standard.StandardPostingsReader; // javadocs +import org.apache.lucene.index.values.DocValues; +import org.apache.lucene.store.ByteArrayDataInput; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.IndexInput; +import org.apache.lucene.util.ArrayUtil; +import org.apache.lucene.util.Bits; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.CodecUtil; +import org.apache.lucene.util.DoubleBarrelLRUCache; + +/** Handles a terms dict, but decouples all details of + * doc/freqs/positions reading to an instance of {@link + * PostingsReaderBase}. This class is reusable for + * codecs that use a different format for + * docs/freqs/positions (though codecs are also free to + * make their own terms dict impl). + * + *

      This class also interacts with an instance of {@link + * TermsIndexReaderBase}, to abstract away the specific + * implementation of the terms dict index. + * @lucene.experimental */ + +public class BlockTermsReader extends FieldsProducer { + // Open input to the main terms dict file (_X.tis) + private final IndexInput in; + + // Reads the terms dict entries, to gather state to + // produce DocsEnum on demand + private final PostingsReaderBase postingsReader; + + private final TreeMap fields = new TreeMap(); + + // Comparator that orders our terms + private final Comparator termComp; + + // Caches the most recently looked-up field + terms: + private final DoubleBarrelLRUCache termsCache; + + // Reads the terms index + private TermsIndexReaderBase indexReader; + + // keeps the dirStart offset + protected long dirOffset; + + // Used as key for the terms cache + private static class FieldAndTerm extends DoubleBarrelLRUCache.CloneableKey { + String field; + BytesRef term; + + public FieldAndTerm() { + } + + public FieldAndTerm(FieldAndTerm other) { + field = other.field; + term = new BytesRef(other.term); + } + + @Override + public boolean equals(Object _other) { + FieldAndTerm other = (FieldAndTerm) _other; + return other.field == field && term.bytesEquals(other.term); + } + + @Override + public Object clone() { + return new FieldAndTerm(this); + } + + @Override + public int hashCode() { + return field.hashCode() * 31 + term.hashCode(); + } + } + + //private String segment; + + public BlockTermsReader(TermsIndexReaderBase indexReader, Directory dir, FieldInfos fieldInfos, String segment, PostingsReaderBase postingsReader, int readBufferSize, + Comparator termComp, int termsCacheSize, String codecId) + throws IOException { + + this.postingsReader = postingsReader; + termsCache = new DoubleBarrelLRUCache(termsCacheSize); + + this.termComp = termComp; + //this.segment = segment; + in = dir.openInput(IndexFileNames.segmentFileName(segment, codecId, BlockTermsWriter.TERMS_EXTENSION), + readBufferSize); + + boolean success = false; + try { + readHeader(in); + + // Have PostingsReader init itself + postingsReader.init(in); + + // Read per-field details + seekDir(in, dirOffset); + + final int numFields = in.readVInt(); + + for(int i=0;i= 0; + final long termsStartPointer = in.readVLong(); + final FieldInfo fieldInfo = fieldInfos.fieldInfo(field); + final long sumTotalTermFreq = fieldInfo.omitTermFreqAndPositions ? -1 : in.readVLong(); + assert !fields.containsKey(fieldInfo.name); + fields.put(fieldInfo.name, new FieldReader(fieldInfo, numTerms, termsStartPointer, sumTotalTermFreq)); + } + success = true; + } finally { + if (!success) { + in.close(); + } + } + + this.indexReader = indexReader; + } + + protected void readHeader(IndexInput input) throws IOException { + CodecUtil.checkHeader(in, BlockTermsWriter.CODEC_NAME, + BlockTermsWriter.VERSION_START, + BlockTermsWriter.VERSION_CURRENT); + dirOffset = in.readLong(); + } + + protected void seekDir(IndexInput input, long dirOffset) + throws IOException { + input.seek(dirOffset); + } + + @Override + public void loadTermsIndex(int indexDivisor) throws IOException { + indexReader.loadTermsIndex(indexDivisor); + } + + @Override + public void close() throws IOException { + try { + try { + if (indexReader != null) { + indexReader.close(); + } + } finally { + // null so if an app hangs on to us (ie, we are not + // GCable, despite being closed) we still free most + // ram + indexReader = null; + if (in != null) { + in.close(); + } + } + } finally { + try { + if (postingsReader != null) { + postingsReader.close(); + } + } finally { + for(FieldReader field : fields.values()) { + field.close(); + } + } + } + } + + public static void files(Directory dir, SegmentInfo segmentInfo, String id, Collection files) { + files.add(IndexFileNames.segmentFileName(segmentInfo.name, id, BlockTermsWriter.TERMS_EXTENSION)); + } + + public static void getExtensions(Collection extensions) { + extensions.add(BlockTermsWriter.TERMS_EXTENSION); + } + + @Override + public FieldsEnum iterator() { + return new TermFieldsEnum(); + } + + @Override + public Terms terms(String field) throws IOException { + return fields.get(field); + } + + // Iterates through all fields + private class TermFieldsEnum extends FieldsEnum { + final Iterator it; + FieldReader current; + + TermFieldsEnum() { + it = fields.values().iterator(); + } + + @Override + public String next() { + if (it.hasNext()) { + current = it.next(); + return current.fieldInfo.name; + } else { + current = null; + return null; + } + } + + @Override + public TermsEnum terms() throws IOException { + return current.iterator(); + } + + @Override + public DocValues docValues() throws IOException { + return null; + } + } + + private class FieldReader extends Terms implements Closeable { + final long numTerms; + final FieldInfo fieldInfo; + final long termsStartPointer; + final long sumTotalTermFreq; + + FieldReader(FieldInfo fieldInfo, long numTerms, long termsStartPointer, long sumTotalTermFreq) { + assert numTerms > 0; + this.fieldInfo = fieldInfo; + this.numTerms = numTerms; + this.termsStartPointer = termsStartPointer; + this.sumTotalTermFreq = sumTotalTermFreq; + } + + @Override + public Comparator getComparator() { + return termComp; + } + + @Override + public void close() { + super.close(); + } + + @Override + public TermsEnum iterator() throws IOException { + return new SegmentTermsEnum(); + } + + @Override + public long getUniqueTermCount() { + return numTerms; + } + + @Override + public long getSumTotalTermFreq() { + return sumTotalTermFreq; + } + + // Iterates through terms in this field + private final class SegmentTermsEnum extends TermsEnum { + private final IndexInput in; + private final BlockTermState state; + private final boolean doOrd; + private final FieldAndTerm fieldTerm = new FieldAndTerm(); + private final TermsIndexReaderBase.FieldIndexEnum indexEnum; + private final BytesRef term = new BytesRef(); + + /* This is true if indexEnum is "still" seek'd to the index term + for the current term. We set it to true on seeking, and then it + remains valid until next() is called enough times to load another + terms block: */ + private boolean indexIsCurrent; + + /* True if we've already called .next() on the indexEnum, to "bracket" + the current block of terms: */ + private boolean didIndexNext; + + /* Next index term, bracketing the current block of terms; this is + only valid if didIndexNext is true: */ + private BytesRef nextIndexTerm; + + /* True after seek(TermState), do defer seeking. If the app then + calls next() (which is not "typical"), then we'll do the real seek */ + private boolean seekPending; + + /* How many blocks we've read since last seek. Once this + is >= indexEnum.getDivisor() we set indexIsCurrent to false (since + the index can no long bracket seek-within-block). */ + private int blocksSinceSeek; + + private byte[] termSuffixes; + private ByteArrayDataInput termSuffixesReader = new ByteArrayDataInput(null); + + /* Common prefix used for all terms in this block. */ + private int termBlockPrefix; + + private byte[] docFreqBytes; + private final ByteArrayDataInput freqReader = new ByteArrayDataInput(null); + private int metaDataUpto; + + public SegmentTermsEnum() throws IOException { + in = (IndexInput) BlockTermsReader.this.in.clone(); + in.seek(termsStartPointer); + indexEnum = indexReader.getFieldEnum(fieldInfo); + doOrd = indexReader.supportsOrd(); + fieldTerm.field = fieldInfo.name; + state = postingsReader.newTermState(); + state.totalTermFreq = -1; + state.ord = -1; + + termSuffixes = new byte[128]; + docFreqBytes = new byte[64]; + //System.out.println("BTR.enum init this=" + this + " postingsReader=" + postingsReader); + } + + @Override + public Comparator getComparator() { + return termComp; + } + + @Override + public SeekStatus seek(final BytesRef target, final boolean useCache) throws IOException { + + if (indexEnum == null) { + throw new IllegalStateException("terms index was not loaded"); + } + + //System.out.println("BTR.seek seg=" + segment + " target=" + fieldInfo.name + ":" + target.utf8ToString() + " " + target + " current=" + term().utf8ToString() + " " + term() + " useCache=" + useCache + " indexIsCurrent=" + indexIsCurrent + " didIndexNext=" + didIndexNext + " seekPending=" + seekPending + " divisor=" + indexReader.getDivisor() + " this=" + this); + /* + if (didIndexNext) { + if (nextIndexTerm == null) { + //System.out.println(" nextIndexTerm=null"); + } else { + //System.out.println(" nextIndexTerm=" + nextIndexTerm.utf8ToString()); + } + } + */ + + // Check cache + if (useCache) { + fieldTerm.term = target; + // TODO: should we differentiate "frozen" + // TermState (ie one that was cloned and + // cached/returned by termState()) from the + // malleable (primary) one? + final TermState cachedState = termsCache.get(fieldTerm); + if (cachedState != null) { + seekPending = true; + //System.out.println(" cached!"); + seek(target, cachedState); + //System.out.println(" term=" + term.utf8ToString()); + return SeekStatus.FOUND; + } + } + + boolean doSeek = true; + + // See if we can avoid seeking, because target term + // is after current term but before next index term: + if (indexIsCurrent) { + + final int cmp = termComp.compare(term, target); + + if (cmp == 0) { + // Already at the requested term + return SeekStatus.FOUND; + } else if (cmp < 0) { + + // Target term is after current term + if (!didIndexNext) { + if (indexEnum.next() == -1) { + nextIndexTerm = null; + } else { + nextIndexTerm = indexEnum.term(); + } + //System.out.println(" now do index next() nextIndexTerm=" + (nextIndexTerm == null ? "null" : nextIndexTerm.utf8ToString())); + didIndexNext = true; + } + + if (nextIndexTerm == null || termComp.compare(target, nextIndexTerm) < 0) { + // Optimization: requested term is within the + // same term block we are now in; skip seeking + // (but do scanning): + doSeek = false; + //System.out.println(" skip seek: nextIndexTerm=" + (nextIndexTerm == null ? "null" : nextIndexTerm.utf8ToString())); + } + } + } + + if (doSeek) { + //System.out.println(" seek"); + + // Ask terms index to find biggest indexed term (= + // first term in a block) that's <= our text: + in.seek(indexEnum.seek(target)); + boolean result = nextBlock(); + + // Block must exist since, at least, the indexed term + // is in the block: + assert result; + + indexIsCurrent = true; + didIndexNext = false; + blocksSinceSeek = 0; + + if (doOrd) { + state.ord = indexEnum.ord()-1; + } + + // NOTE: the first _next() after an index seek is + // a bit wasteful, since it redundantly reads some + // suffix bytes into the buffer. We could avoid storing + // those bytes in the primary file, but then when + // next()ing over an index term we'd have to + // special case it: + term.copy(indexEnum.term()); + //System.out.println(" seek: term=" + term.utf8ToString()); + } else { + ////System.out.println(" skip seek"); + } + + seekPending = false; + + // Now scan: + while (_next() != null) { + final int cmp = termComp.compare(term, target); + if (cmp == 0) { + // Match! + if (useCache) { + // Store in cache + decodeMetaData(); + termsCache.put(new FieldAndTerm(fieldTerm), (BlockTermState) state.clone()); + } + //System.out.println(" FOUND"); + return SeekStatus.FOUND; + } else if (cmp > 0) { + //System.out.println(" NOT_FOUND term=" + term.utf8ToString()); + return SeekStatus.NOT_FOUND; + } + + // The purpose of the terms dict index is to seek + // the enum to the closest index term before the + // term we are looking for. So, we should never + // cross another index term (besides the first + // one) while we are scanning: + assert indexIsCurrent; + } + + indexIsCurrent = false; + //System.out.println(" END"); + return SeekStatus.END; + } + + @Override + public BytesRef next() throws IOException { + //System.out.println("BTR.next() seekPending=" + seekPending + " pendingSeekCount=" + state.termCount); + + // If seek was previously called and the term was cached, + // usually caller is just going to pull a D/&PEnum or get + // docFreq, etc. But, if they then call next(), + // this method catches up all internal state so next() + // works properly: + if (seekPending) { + assert !indexIsCurrent; + in.seek(state.blockFilePointer); + final int pendingSeekCount = state.termCount; + boolean result = nextBlock(); + + final long savOrd = state.ord; + + // Block must exist since seek(TermState) was called w/ a + // TermState previously returned by this enum when positioned + // on a real term: + assert result; + + while(state.termCount < pendingSeekCount) { + BytesRef nextResult = _next(); + assert nextResult != null; + } + seekPending = false; + state.ord = savOrd; + } + return _next(); + } + + /* Decodes only the term bytes of the next term. If caller then asks for + metadata, ie docFreq, totalTermFreq or pulls a D/&PEnum, we then (lazily) + decode all metadata up to the current term. */ + private BytesRef _next() throws IOException { + //System.out.println("BTR._next seg=" + segment + " this=" + this + " termCount=" + state.termCount + " (vs " + state.blockTermCount + ")"); + if (state.termCount == state.blockTermCount) { + if (!nextBlock()) { + //System.out.println(" eof"); + indexIsCurrent = false; + return null; + } + } + + // TODO: cutover to something better for these ints! simple64? + final int suffix = termSuffixesReader.readVInt(); + //System.out.println(" suffix=" + suffix); + + term.length = termBlockPrefix + suffix; + if (term.bytes.length < term.length) { + term.grow(term.length); + } + termSuffixesReader.readBytes(term.bytes, termBlockPrefix, suffix); + state.termCount++; + + // NOTE: meaningless in the non-ord case + state.ord++; + + //System.out.println(" return term=" + fieldInfo.name + ":" + term.utf8ToString() + " " + term); + return term; + } + + @Override + public BytesRef term() { + return term; + } + + @Override + public int docFreq() throws IOException { + //System.out.println("BTR.docFreq"); + decodeMetaData(); + //System.out.println(" return " + state.docFreq); + return state.docFreq; + } + + @Override + public long totalTermFreq() throws IOException { + decodeMetaData(); + return state.totalTermFreq; + } + + @Override + public DocsEnum docs(Bits skipDocs, DocsEnum reuse) throws IOException { + //System.out.println("BTR.docs this=" + this); + decodeMetaData(); + //System.out.println(" state.docFreq=" + state.docFreq); + final DocsEnum docsEnum = postingsReader.docs(fieldInfo, state, skipDocs, reuse); + assert docsEnum != null; + return docsEnum; + } + + @Override + public DocsAndPositionsEnum docsAndPositions(Bits skipDocs, DocsAndPositionsEnum reuse) throws IOException { + //System.out.println("BTR.d&p this=" + this); + decodeMetaData(); + if (fieldInfo.omitTermFreqAndPositions) { + return null; + } else { + DocsAndPositionsEnum dpe = postingsReader.docsAndPositions(fieldInfo, state, skipDocs, reuse); + //System.out.println(" return d&pe=" + dpe); + return dpe; + } + } + + @Override + public void seek(BytesRef target, TermState otherState) throws IOException { + //System.out.println("BTR.seek termState target=" + target.utf8ToString() + " " + target + " this=" + this); + assert otherState != null && otherState instanceof BlockTermState; + assert !doOrd || ((BlockTermState) otherState).ord < numTerms; + state.copyFrom(otherState); + seekPending = true; + indexIsCurrent = false; + term.copy(target); + } + + @Override + public TermState termState() throws IOException { + //System.out.println("BTR.termState this=" + this); + decodeMetaData(); + TermState ts = (TermState) state.clone(); + //System.out.println(" return ts=" + ts); + return ts; + } + + @Override + public SeekStatus seek(long ord) throws IOException { + //System.out.println("BTR.seek by ord ord=" + ord); + if (indexEnum == null) { + throw new IllegalStateException("terms index was not loaded"); + } + + if (ord >= numTerms) { + state.ord = numTerms-1; + return SeekStatus.END; + } + + // TODO: if ord is in same terms block and + // after current ord, we should avoid this seek just + // like we do in the seek(BytesRef) case + in.seek(indexEnum.seek(ord)); + boolean result = nextBlock(); + + // Block must exist since ord < numTerms: + assert result; + + indexIsCurrent = true; + didIndexNext = false; + blocksSinceSeek = 0; + seekPending = false; + + state.ord = indexEnum.ord()-1; + assert state.ord >= -1: "ord=" + state.ord; + term.copy(indexEnum.term()); + + // Now, scan: + int left = (int) (ord - state.ord); + while(left > 0) { + final BytesRef term = _next(); + assert term != null; + left--; + assert indexIsCurrent; + } + + // always found + return SeekStatus.FOUND; + } + + @Override + public long ord() { + if (!doOrd) { + throw new UnsupportedOperationException(); + } + return state.ord; + } + + private void doPendingSeek() { + } + + /* Does initial decode of next block of terms; this + doesn't actually decode the docFreq, totalTermFreq, + postings details (frq/prx offset, etc.) metadata; + it just loads them as byte[] blobs which are then + decoded on-demand if the metadata is ever requested + for any term in this block. This enables terms-only + intensive consumes (eg certain MTQs, respelling) to + not pay the price of decoding metadata they won't + use. */ + private boolean nextBlock() throws IOException { + + // TODO: we still lazy-decode the byte[] for each + // term (the suffix), but, if we decoded + // all N terms up front then seeking could do a fast + // bsearch w/in the block... + + //System.out.println("BTR.nextBlock() fp=" + in.getFilePointer() + " this=" + this); + state.blockFilePointer = in.getFilePointer(); + state.blockTermCount = in.readVInt(); + //System.out.println(" blockTermCount=" + state.blockTermCount); + if (state.blockTermCount == 0) { + return false; + } + termBlockPrefix = in.readVInt(); + + // term suffixes: + int len = in.readVInt(); + if (termSuffixes.length < len) { + termSuffixes = new byte[ArrayUtil.oversize(len, 1)]; + } + //System.out.println(" termSuffixes len=" + len); + in.readBytes(termSuffixes, 0, len); + termSuffixesReader.reset(termSuffixes); + + // docFreq, totalTermFreq + len = in.readVInt(); + if (docFreqBytes.length < len) { + docFreqBytes = new byte[ArrayUtil.oversize(len, 1)]; + } + //System.out.println(" freq bytes len=" + len); + in.readBytes(docFreqBytes, 0, len); + freqReader.reset(docFreqBytes); + metaDataUpto = 0; + + state.termCount = 0; + + postingsReader.readTermsBlock(in, fieldInfo, state); + + blocksSinceSeek++; + indexIsCurrent &= (blocksSinceSeek < indexReader.getDivisor()); + //System.out.println(" indexIsCurrent=" + indexIsCurrent); + + return true; + } + + private void decodeMetaData() throws IOException { + //System.out.println("BTR.decodeMetadata mdUpto=" + metaDataUpto + " vs termCount=" + state.termCount + " state=" + state); + if (!seekPending) { + // lazily catch up on metadata decode: + final int limit = state.termCount; + state.termCount = metaDataUpto; + while (metaDataUpto < limit) { + //System.out.println(" decode"); + // TODO: we could make "tiers" of metadata, ie, + // decode docFreq/totalTF but don't decode postings + // metadata; this way caller could get + // docFreq/totalTF w/o paying decode cost for + // postings + state.docFreq = freqReader.readVInt(); + if (!fieldInfo.omitTermFreqAndPositions) { + state.totalTermFreq = state.docFreq + freqReader.readVLong(); + } + postingsReader.nextTerm(fieldInfo, state); + metaDataUpto++; + state.termCount++; + } + } else { + //System.out.println(" skip! seekPending"); + } + } + } + } +} diff --git a/lucene/src/java/org/apache/lucene/index/codecs/BlockTermsWriter.java b/lucene/src/java/org/apache/lucene/index/codecs/BlockTermsWriter.java new file mode 100644 index 00000000000..c60b42506ed --- /dev/null +++ b/lucene/src/java/org/apache/lucene/index/codecs/BlockTermsWriter.java @@ -0,0 +1,316 @@ +package org.apache.lucene.index.codecs; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Comparator; +import java.util.List; + +import org.apache.lucene.index.FieldInfo; +import org.apache.lucene.index.FieldInfos; +import org.apache.lucene.index.IndexFileNames; +import org.apache.lucene.index.SegmentWriteState; +import org.apache.lucene.store.IndexOutput; +import org.apache.lucene.store.RAMOutputStream; +import org.apache.lucene.util.ArrayUtil; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.CodecUtil; +import org.apache.lucene.util.RamUsageEstimator; + +// TODO: currently we encode all terms between two indexed +// terms as a block; but, we could decouple the two, ie +// allow several blocks in between two indexed terms + +/** + * Writes terms dict, block-encoding (column stride) each + * term's metadata for each set of terms between two + * index terms. + * + * @lucene.experimental + */ + +public class BlockTermsWriter extends FieldsConsumer { + + final static String CODEC_NAME = "BLOCK_TERMS_DICT"; + + // Initial format + public static final int VERSION_START = 0; + + public static final int VERSION_CURRENT = VERSION_START; + + /** Extension of terms file */ + static final String TERMS_EXTENSION = "tib"; + + protected final IndexOutput out; + final PostingsWriterBase postingsWriter; + final FieldInfos fieldInfos; + FieldInfo currentField; + private final TermsIndexWriterBase termsIndexWriter; + private final List fields = new ArrayList(); + private final Comparator termComp; + private final String segment; + + public BlockTermsWriter( + TermsIndexWriterBase termsIndexWriter, + SegmentWriteState state, + PostingsWriterBase postingsWriter, + Comparator termComp) throws IOException + { + final String termsFileName = IndexFileNames.segmentFileName(state.segmentName, state.codecId, TERMS_EXTENSION); + this.termsIndexWriter = termsIndexWriter; + this.termComp = termComp; + out = state.directory.createOutput(termsFileName); + fieldInfos = state.fieldInfos; + writeHeader(out); + currentField = null; + this.postingsWriter = postingsWriter; + segment = state.segmentName; + + //System.out.println("BTW.init seg=" + state.segmentName); + + postingsWriter.start(out); // have consumer write its format/header + } + + protected void writeHeader(IndexOutput out) throws IOException { + CodecUtil.writeHeader(out, CODEC_NAME, VERSION_CURRENT); + + out.writeLong(0); // leave space for end index pointer + } + + @Override + public TermsConsumer addField(FieldInfo field) throws IOException { + //System.out.println("\nBTW.addField seg=" + segment + " field=" + field.name); + assert currentField == null || currentField.name.compareTo(field.name) < 0; + currentField = field; + TermsIndexWriterBase.FieldWriter fieldIndexWriter = termsIndexWriter.addField(field, out.getFilePointer()); + final TermsWriter terms = new TermsWriter(fieldIndexWriter, field, postingsWriter); + fields.add(terms); + return terms; + } + + @Override + public void close() throws IOException { + + try { + + int nonZeroCount = 0; + for(TermsWriter field : fields) { + if (field.numTerms > 0) { + nonZeroCount++; + } + } + + final long dirStart = out.getFilePointer(); + + out.writeVInt(nonZeroCount); + for(TermsWriter field : fields) { + if (field.numTerms > 0) { + out.writeVInt(field.fieldInfo.number); + out.writeVLong(field.numTerms); + out.writeVLong(field.termsStartPointer); + if (!field.fieldInfo.omitTermFreqAndPositions) { + out.writeVLong(field.sumTotalTermFreq); + } + } + } + writeTrailer(dirStart); + } finally { + try { + out.close(); + } finally { + try { + postingsWriter.close(); + } finally { + termsIndexWriter.close(); + } + } + } + } + + protected void writeTrailer(long dirStart) throws IOException { + // TODO Auto-generated method stub + out.seek(CodecUtil.headerLength(CODEC_NAME)); + out.writeLong(dirStart); + } + + private static class TermEntry { + public final BytesRef term = new BytesRef(); + public TermStats stats; + } + + class TermsWriter extends TermsConsumer { + private final FieldInfo fieldInfo; + private final PostingsWriterBase postingsWriter; + private final long termsStartPointer; + private long numTerms; + private final TermsIndexWriterBase.FieldWriter fieldIndexWriter; + long sumTotalTermFreq; + private final BytesRef lastTerm = new BytesRef(); + + private TermEntry[] pendingTerms; + + private int pendingCount; + + TermsWriter( + TermsIndexWriterBase.FieldWriter fieldIndexWriter, + FieldInfo fieldInfo, + PostingsWriterBase postingsWriter) + { + this.fieldInfo = fieldInfo; + this.fieldIndexWriter = fieldIndexWriter; + pendingTerms = new TermEntry[32]; + for(int i=0;i getComparator() { + return termComp; + } + + @Override + public PostingsConsumer startTerm(BytesRef text) throws IOException { + //System.out.println("BTW.startTerm seg=" + segment + " term=" + fieldInfo.name + ":" + text.utf8ToString() + " " + text); + postingsWriter.startTerm(); + return postingsWriter; + } + + private final BytesRef lastPrevTerm = new BytesRef(); + + @Override + public void finishTerm(BytesRef text, TermStats stats) throws IOException { + + assert stats.docFreq > 0; + //System.out.println("BTW.finishTerm seg=" + segment + " term=" + fieldInfo.name + ":" + text.utf8ToString() + " " + text + " df=" + stats.docFreq); + + final boolean isIndexTerm = fieldIndexWriter.checkIndexTerm(text, stats); + + if (isIndexTerm) { + if (pendingCount > 0) { + // Instead of writing each term, live, we gather terms + // in RAM in a pending buffer, and then write the + // entire block in between index terms: + flushBlock(); + } + fieldIndexWriter.add(text, stats, out.getFilePointer()); + } + + if (pendingTerms.length == pendingCount) { + final TermEntry[] newArray = new TermEntry[ArrayUtil.oversize(pendingCount+1, RamUsageEstimator.NUM_BYTES_OBJECT_REF)]; + System.arraycopy(pendingTerms, 0, newArray, 0, pendingCount); + for(int i=pendingCount;i 0) { + flushBlock(); + } + // EOF marker: + out.writeVInt(0); + + this.sumTotalTermFreq = sumTotalTermFreq; + fieldIndexWriter.finish(out.getFilePointer()); + } + + private int sharedPrefix(BytesRef term1, BytesRef term2) { + assert term1.offset == 0; + assert term2.offset == 0; + int pos1 = 0; + int pos1End = pos1 + Math.min(term1.length, term2.length); + int pos2 = 0; + while(pos1 < pos1End) { + if (term1.bytes[pos1] != term2.bytes[pos2]) { + return pos1; + } + pos1++; + pos2++; + } + return pos1; + } + + private final RAMOutputStream bytesWriter = new RAMOutputStream(); + + private void flushBlock() throws IOException { + //System.out.println("BTW.flushBlock pendingCount=" + pendingCount); + + // First pass: compute common prefix for all terms + // in the block, against term before first term in + // this block: + int commonPrefix = sharedPrefix(lastPrevTerm, pendingTerms[0].term); + for(int termCount=1;termCount 0; i--) { // read segmentInfos - infos.add(new SegmentInfo(directory, format, input, codecs)); + SegmentInfo si = new SegmentInfo(directory, format, input, codecs); + if (si.getVersion() == null) { + // Could be a 3.0 - try to open the doc stores - if it fails, it's a + // 2.x segment, and an IndexFormatTooOldException will be thrown, + // which is what we want. + Directory dir = directory; + if (si.getDocStoreOffset() != -1) { + if (si.getDocStoreIsCompoundFile()) { + dir = new CompoundFileReader(dir, IndexFileNames.segmentFileName( + si.getDocStoreSegment(), "", + IndexFileNames.COMPOUND_FILE_STORE_EXTENSION), 1024); + } + } else if (si.getUseCompoundFile()) { + dir = new CompoundFileReader(dir, IndexFileNames.segmentFileName( + si.name, "", IndexFileNames.COMPOUND_FILE_EXTENSION), 1024); + } + + try { + FieldsReader.checkCodeVersion(dir, si.getDocStoreSegment()); + } finally { + // If we opened the directory, close it + if (dir != directory) dir.close(); + } + + // Above call succeeded, so it's a 3.0 segment. Upgrade it so the next + // time the segment is read, its version won't be null and we won't + // need to open FieldsReader every time for each such segment. + si.setVersion("3.0"); + } else if (si.getVersion().equals("2.x")) { + // If it's a 3x index touched by 3.1+ code, then segments record their + // version, whether they are 2.x ones or not. We detect that and throw + // appropriate exception. + throw new IndexFormatTooOldException(si.name, si.getVersion()); + } + infos.add(si); } infos.userData = input.readStringStringMap(); diff --git a/lucene/src/java/org/apache/lucene/index/codecs/DefaultSegmentInfosWriter.java b/lucene/src/java/org/apache/lucene/index/codecs/DefaultSegmentInfosWriter.java index c89fe948072..f034a412f52 100644 --- a/lucene/src/java/org/apache/lucene/index/codecs/DefaultSegmentInfosWriter.java +++ b/lucene/src/java/org/apache/lucene/index/codecs/DefaultSegmentInfosWriter.java @@ -38,9 +38,12 @@ public class DefaultSegmentInfosWriter extends SegmentInfosWriter { /** Each segment records whether it has term vectors */ public static final int FORMAT_HAS_VECTORS = -10; + /** Each segment records the Lucene version that created it. */ + public static final int FORMAT_3_1 = -11; + /** Each segment records whether its postings are written * in the new flex format */ - public static final int FORMAT_4_0 = -11; + public static final int FORMAT_4_0 = -12; /** This must always point to the most recent file format. * whenever you add a new format, make it 1 smaller (negative version logic)! */ diff --git a/lucene/src/java/org/apache/lucene/index/codecs/DeltaBytesWriter.java b/lucene/src/java/org/apache/lucene/index/codecs/DeltaBytesWriter.java deleted file mode 100644 index 3785c40948b..00000000000 --- a/lucene/src/java/org/apache/lucene/index/codecs/DeltaBytesWriter.java +++ /dev/null @@ -1,75 +0,0 @@ -package org.apache.lucene.index.codecs; - -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import org.apache.lucene.util.ArrayUtil; -import org.apache.lucene.store.IndexOutput; -import org.apache.lucene.util.BytesRef; -import static org.apache.lucene.util.ByteBlockPool.BYTE_BLOCK_SIZE; - -import java.io.IOException; - -final class DeltaBytesWriter { - - // Must be bigger than - // DocumentsWriter.MAX_TERM_LENGTH_UTF8. If you change - // this it's an index format change, so that change must be - // versioned: - final static int TERM_EOF = BYTE_BLOCK_SIZE; - - private byte[] lastBytes = new byte[10]; - private int lastLength; - final IndexOutput out; - - DeltaBytesWriter(IndexOutput out) { - this.out = out; - } - - void reset() { - lastLength = 0; - } - - void write(BytesRef text) throws IOException { - int start = 0; - int upto = text.offset; - final int length = text.length; - final byte[] bytes = text.bytes; - - final int limit = length < lastLength ? length : lastLength; - while(start < limit) { - if (bytes[upto] != lastBytes[start]) { - break; - } - start++; - upto++; - } - - final int suffix = length - start; - out.writeVInt(start); // prefix - out.writeVInt(suffix); // suffix - out.writeBytes(bytes, upto, suffix); - if (lastBytes.length < length) { - lastBytes = ArrayUtil.grow(lastBytes, length); - } - // TODO: is this copy really necessary? I don't think - // caller actually modifies these bytes, so we can save - // by reference? - System.arraycopy(bytes, upto, lastBytes, start, suffix); - lastLength = length; - } -} diff --git a/lucene/src/java/org/apache/lucene/index/codecs/FixedGapTermsIndexReader.java b/lucene/src/java/org/apache/lucene/index/codecs/FixedGapTermsIndexReader.java index c335dc6fcff..c4350694cb0 100644 --- a/lucene/src/java/org/apache/lucene/index/codecs/FixedGapTermsIndexReader.java +++ b/lucene/src/java/org/apache/lucene/index/codecs/FixedGapTermsIndexReader.java @@ -44,7 +44,7 @@ public class FixedGapTermsIndexReader extends TermsIndexReaderBase { // number of places to multiply out the actual ord, and we // will overflow int during those multiplies. So to avoid // having to upgrade each multiple to long in multiple - // places (error proned), we use long here: + // places (error prone), we use long here: private long totalIndexInterval; private int indexDivisor; @@ -94,6 +94,7 @@ public class FixedGapTermsIndexReader extends TermsIndexReaderBase { // Read directory final int numFields = in.readVInt(); + //System.out.println("FGR: init seg=" + segment + " div=" + indexDivisor + " nF=" + numFields); for(int i=0;i 0) { loadTermsIndex(); } diff --git a/lucene/src/java/org/apache/lucene/index/codecs/FixedGapTermsIndexWriter.java b/lucene/src/java/org/apache/lucene/index/codecs/FixedGapTermsIndexWriter.java index 152181557ee..1331ebf7879 100644 --- a/lucene/src/java/org/apache/lucene/index/codecs/FixedGapTermsIndexWriter.java +++ b/lucene/src/java/org/apache/lucene/index/codecs/FixedGapTermsIndexWriter.java @@ -53,7 +53,6 @@ public class FixedGapTermsIndexWriter extends TermsIndexWriterBase { private final List fields = new ArrayList(); private final FieldInfos fieldInfos; // unread - private IndexOutput termsOut; public FixedGapTermsIndexWriter(SegmentWriteState state) throws IOException { final String indexFileName = IndexFileNames.segmentFileName(state.segmentName, state.codecId, TERMS_INDEX_EXTENSION); @@ -71,13 +70,9 @@ public class FixedGapTermsIndexWriter extends TermsIndexWriterBase { } @Override - public void setTermsOutput(IndexOutput termsOut) { - this.termsOut = termsOut; - } - - @Override - public FieldWriter addField(FieldInfo field) { - SimpleFieldWriter writer = new SimpleFieldWriter(field); + public FieldWriter addField(FieldInfo field, long termsFilePointer) { + //System.out.println("FGW: addFfield=" + field.name); + SimpleFieldWriter writer = new SimpleFieldWriter(field, termsFilePointer); fields.add(writer); return writer; } @@ -119,44 +114,19 @@ public class FixedGapTermsIndexWriter extends TermsIndexWriterBase { private final BytesRef lastTerm = new BytesRef(); - SimpleFieldWriter(FieldInfo fieldInfo) { + SimpleFieldWriter(FieldInfo fieldInfo, long termsFilePointer) { this.fieldInfo = fieldInfo; indexStart = out.getFilePointer(); - termsStart = lastTermsPointer = termsOut.getFilePointer(); + termsStart = lastTermsPointer = termsFilePointer; termLengths = new short[0]; termsPointerDeltas = new int[0]; } @Override - public boolean checkIndexTerm(BytesRef text, int docFreq) throws IOException { + public boolean checkIndexTerm(BytesRef text, TermStats stats) throws IOException { // First term is first indexed term: + //System.out.println("FGW: checkIndexTerm text=" + text.utf8ToString()); if (0 == (numTerms++ % termIndexInterval)) { - - final int indexedTermLength = indexedTermPrefixLength(lastTerm, text); - - // write only the min prefix that shows the diff - // against prior term - out.writeBytes(text.bytes, text.offset, indexedTermLength); - - if (termLengths.length == numIndexTerms) { - termLengths = ArrayUtil.grow(termLengths); - } - if (termsPointerDeltas.length == numIndexTerms) { - termsPointerDeltas = ArrayUtil.grow(termsPointerDeltas); - } - - // save delta terms pointer - final long fp = termsOut.getFilePointer(); - termsPointerDeltas[numIndexTerms] = (int) (fp - lastTermsPointer); - lastTermsPointer = fp; - - // save term length (in bytes) - assert indexedTermLength <= Short.MAX_VALUE; - termLengths[numIndexTerms] = (short) indexedTermLength; - totTermLength += indexedTermLength; - - lastTerm.copy(text); - numIndexTerms++; return true; } else { if (0 == numTerms % termIndexInterval) { @@ -169,13 +139,41 @@ public class FixedGapTermsIndexWriter extends TermsIndexWriterBase { } @Override - public void finish() throws IOException { + public void add(BytesRef text, TermStats stats, long termsFilePointer) throws IOException { + final int indexedTermLength = indexedTermPrefixLength(lastTerm, text); + //System.out.println("FGW: add text=" + text.utf8ToString() + " " + text + " fp=" + termsFilePointer); + + // write only the min prefix that shows the diff + // against prior term + out.writeBytes(text.bytes, text.offset, indexedTermLength); + + if (termLengths.length == numIndexTerms) { + termLengths = ArrayUtil.grow(termLengths); + } + if (termsPointerDeltas.length == numIndexTerms) { + termsPointerDeltas = ArrayUtil.grow(termsPointerDeltas); + } + + // save delta terms pointer + termsPointerDeltas[numIndexTerms] = (int) (termsFilePointer - lastTermsPointer); + lastTermsPointer = termsFilePointer; + + // save term length (in bytes) + assert indexedTermLength <= Short.MAX_VALUE; + termLengths[numIndexTerms] = (short) indexedTermLength; + totTermLength += indexedTermLength; + + lastTerm.copy(text); + numIndexTerms++; + } + + @Override + public void finish(long termsFilePointer) throws IOException { // write primary terms dict offsets packedIndexStart = out.getFilePointer(); - final long maxValue = termsOut.getFilePointer(); - PackedInts.Writer w = PackedInts.getWriter(out, numIndexTerms, PackedInts.bitsRequired(maxValue)); + PackedInts.Writer w = PackedInts.getWriter(out, numIndexTerms, PackedInts.bitsRequired(termsFilePointer)); // relative to our indexStart long upto = 0; diff --git a/lucene/src/java/org/apache/lucene/index/codecs/MergeState.java b/lucene/src/java/org/apache/lucene/index/codecs/MergeState.java index cfc8c749a3f..ad29d1c9b1d 100644 --- a/lucene/src/java/org/apache/lucene/index/codecs/MergeState.java +++ b/lucene/src/java/org/apache/lucene/index/codecs/MergeState.java @@ -17,13 +17,16 @@ package org.apache.lucene.index.codecs; * limitations under the License. */ +import java.util.List; + import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.FieldInfos; import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.MergePolicy; import org.apache.lucene.index.PayloadProcessorProvider.DirPayloadProcessor; import org.apache.lucene.index.PayloadProcessorProvider.PayloadProcessor; +import org.apache.lucene.store.Directory; import org.apache.lucene.util.Bits; -import java.util.List; /** Holds common state used during segment merging * @@ -37,6 +40,7 @@ public class MergeState { public int[] docBase; // New docID base per reader public int mergedDocCount; // Total # merged docs public Bits multiDeletedDocs; + public CheckAbort checkAbort; // Updated per field; public FieldInfo fieldInfo; @@ -45,5 +49,30 @@ public class MergeState { public boolean hasPayloadProcessorProvider; public DirPayloadProcessor[] dirPayloadProcessor; public PayloadProcessor[] currentPayloadProcessor; - + + public static class CheckAbort { + private double workCount; + private MergePolicy.OneMerge merge; + private Directory dir; + public CheckAbort(MergePolicy.OneMerge merge, Directory dir) { + this.merge = merge; + this.dir = dir; + } + + /** + * Records the fact that roughly units amount of work + * have been done since this method was last called. + * When adding time-consuming code into SegmentMerger, + * you should test different values for units to ensure + * that the time in between calls to merge.checkAborted + * is up to ~ 1 second. + */ + public void work(double units) throws MergePolicy.MergeAbortedException { + workCount += units; + if (workCount >= 10000.0) { + merge.checkAborted(dir); + workCount = 0; + } + } + } } diff --git a/lucene/src/java/org/apache/lucene/index/codecs/MultiLevelSkipListReader.java b/lucene/src/java/org/apache/lucene/index/codecs/MultiLevelSkipListReader.java index 0f65c818b16..b75aa478a5a 100644 --- a/lucene/src/java/org/apache/lucene/index/codecs/MultiLevelSkipListReader.java +++ b/lucene/src/java/org/apache/lucene/index/codecs/MultiLevelSkipListReader.java @@ -172,6 +172,8 @@ public abstract class MultiLevelSkipListReader { public void init(long skipPointer, int df) { this.skipPointer[0] = skipPointer; this.docCount = df; + assert skipPointer >= 0 && skipPointer <= skipStream[0].length() + : "invalid skip pointer: " + skipPointer + ", length=" + skipStream[0].length(); Arrays.fill(skipDoc, 0); Arrays.fill(numSkipped, 0); Arrays.fill(childPointer, 0); diff --git a/lucene/src/java/org/apache/lucene/index/codecs/PostingsConsumer.java b/lucene/src/java/org/apache/lucene/index/codecs/PostingsConsumer.java index a6bd46fe82a..b5c2c8bfa81 100644 --- a/lucene/src/java/org/apache/lucene/index/codecs/PostingsConsumer.java +++ b/lucene/src/java/org/apache/lucene/index/codecs/PostingsConsumer.java @@ -30,9 +30,9 @@ import org.apache.lucene.util.BytesRef; public abstract class PostingsConsumer { - /** Adds a new doc in this term. Return null if this - * consumer doesn't need to see the positions for this - * doc. */ + /** Adds a new doc in this term. If this field omits term + * freqs & positions then termDocFreq should be ignored, + * and, finishDoc will not be called. */ public abstract void startDoc(int docID, int termDocFreq) throws IOException; public static class PostingsMergeState { @@ -49,14 +49,16 @@ public abstract class PostingsConsumer { public abstract void addPosition(int position, BytesRef payload) throws IOException; /** Called when we are done adding positions & payloads - * for each doc */ + * for each doc. Not called when the field omits term + * freq and positions. */ public abstract void finishDoc() throws IOException; /** Default merge impl: append documents, mapping around * deletes */ - public int merge(final MergeState mergeState, final DocsEnum postings) throws IOException { + public TermStats merge(final MergeState mergeState, final DocsEnum postings) throws IOException { int df = 0; + long totTF = 0; if (mergeState.fieldInfo.omitTermFreqAndPositions) { while(true) { @@ -67,6 +69,7 @@ public abstract class PostingsConsumer { this.startDoc(doc, postings.freq()); this.finishDoc(); df++; + totTF++; } } else { final DocsAndPositionsEnum postingsEnum = (DocsAndPositionsEnum) postings; @@ -77,6 +80,7 @@ public abstract class PostingsConsumer { } final int freq = postingsEnum.freq(); this.startDoc(doc, freq); + totTF += freq; for(int i=0;iThis class also interacts with an instance of {@link - * TermsIndexReaderBase}, to abstract away the specific - * implementation of the terms dict index. - * @lucene.experimental */ - -public class PrefixCodedTermsReader extends FieldsProducer { - // Open input to the main terms dict file (_X.tis) - private final IndexInput in; - - // Reads the terms dict entries, to gather state to - // produce DocsEnum on demand - private final PostingsReaderBase postingsReader; - - private final TreeMap fields = new TreeMap(); - - // Comparator that orders our terms - private final Comparator termComp; - - // Caches the most recently looked-up field + terms: - private final DoubleBarrelLRUCache termsCache; - - // Reads the terms index - private TermsIndexReaderBase indexReader; - - // keeps the dirStart offset - protected long dirOffset; - - // Used as key for the terms cache - private static class FieldAndTerm extends DoubleBarrelLRUCache.CloneableKey { - String field; - BytesRef term; - - public FieldAndTerm() { - } - - public FieldAndTerm(String field, BytesRef term) { - this.field = field; - this.term = new BytesRef(term); - } - - public FieldAndTerm(FieldAndTerm other) { - field = other.field; - term = new BytesRef(other.term); - } - - @Override - public boolean equals(Object _other) { - FieldAndTerm other = (FieldAndTerm) _other; - return other.field == field && term.bytesEquals(other.term); - } - - @Override - public Object clone() { - return new FieldAndTerm(this); - } - - @Override - public int hashCode() { - return field.hashCode() * 31 + term.hashCode(); - } - } - - public PrefixCodedTermsReader(TermsIndexReaderBase indexReader, Directory dir, FieldInfos fieldInfos, String segment, PostingsReaderBase postingsReader, int readBufferSize, - Comparator termComp, int termsCacheSize, String codecId) - throws IOException { - - this.postingsReader = postingsReader; - termsCache = new DoubleBarrelLRUCache(termsCacheSize); - - this.termComp = termComp; - - in = dir.openInput(IndexFileNames.segmentFileName(segment, codecId, PrefixCodedTermsWriter.TERMS_EXTENSION), - readBufferSize); - - boolean success = false; - try { - readHeader(in); - - // Have PostingsReader init itself - postingsReader.init(in); - - // Read per-field details - seekDir(in, dirOffset); - - final int numFields = in.readInt(); - - for(int i=0;i= 0; - final long termsStartPointer = in.readLong(); - final FieldInfo fieldInfo = fieldInfos.fieldInfo(field); - if (numTerms > 0) { - assert !fields.containsKey(fieldInfo.name); - fields.put(fieldInfo.name, new FieldReader(fieldInfo, numTerms, termsStartPointer)); - } - } - success = true; - } finally { - if (!success) { - in.close(); - } - } - - this.indexReader = indexReader; - } - - protected void readHeader(IndexInput input) throws IOException { - CodecUtil.checkHeader(in, PrefixCodedTermsWriter.CODEC_NAME, - PrefixCodedTermsWriter.VERSION_START, PrefixCodedTermsWriter.VERSION_CURRENT); - dirOffset = in.readLong(); - } - - protected void seekDir(IndexInput input, long dirOffset) - throws IOException { - input.seek(dirOffset); - } - - @Override - public void loadTermsIndex(int indexDivisor) throws IOException { - indexReader.loadTermsIndex(indexDivisor); - } - - @Override - public void close() throws IOException { - try { - try { - if (indexReader != null) { - indexReader.close(); - } - } finally { - // null so if an app hangs on to us (ie, we are not - // GCable, despite being closed) we still free most - // ram - indexReader = null; - if (in != null) { - in.close(); - } - } - } finally { - try { - if (postingsReader != null) { - postingsReader.close(); - } - } finally { - for(FieldReader field : fields.values()) { - field.close(); - } - } - } - } - - public static void files(Directory dir, SegmentInfo segmentInfo, String id, Collection files) { - files.add(IndexFileNames.segmentFileName(segmentInfo.name, id, PrefixCodedTermsWriter.TERMS_EXTENSION)); - } - - public static void getExtensions(Collection extensions) { - extensions.add(PrefixCodedTermsWriter.TERMS_EXTENSION); - } - - @Override - public FieldsEnum iterator() { - return new TermFieldsEnum(); - } - - @Override - public Terms terms(String field) throws IOException { - return fields.get(field); - } - - // Iterates through all fields - private class TermFieldsEnum extends FieldsEnum { - final Iterator it; - FieldReader current; - - TermFieldsEnum() { - it = fields.values().iterator(); - } - - @Override - public String next() { - if (it.hasNext()) { - current = it.next(); - return current.fieldInfo.name; - } else { - current = null; - return null; - } - } - - @Override - public TermsEnum terms() throws IOException { - return current.iterator(); - } - - @Override - public DocValues docValues() throws IOException { - // TODO Auto-generated method stub - return null; - } - } - - private class FieldReader extends Terms implements Closeable { - final long numTerms; - final FieldInfo fieldInfo; - final long termsStartPointer; - - FieldReader(FieldInfo fieldInfo, long numTerms, long termsStartPointer) { - assert numTerms > 0; - this.fieldInfo = fieldInfo; - this.numTerms = numTerms; - this.termsStartPointer = termsStartPointer; - } - - @Override - public Comparator getComparator() { - return termComp; - } - - @Override - public void close() { - super.close(); - } - - @Override - public TermsEnum iterator() throws IOException { - return new SegmentTermsEnum(); - } - - @Override - public long getUniqueTermCount() { - return numTerms; - } - - // Iterates through terms in this field, not supporting ord() - private class SegmentTermsEnum extends TermsEnum { - private final IndexInput in; - private final DeltaBytesReader bytesReader; - private final TermState state; - private boolean seekPending; - private final FieldAndTerm fieldTerm = new FieldAndTerm(); - private final TermsIndexReaderBase.FieldIndexEnum indexEnum; - private boolean positioned; - private boolean didIndexNext; - private BytesRef nextIndexTerm; - private boolean isIndexTerm; - private final boolean doOrd; - - SegmentTermsEnum() throws IOException { - in = (IndexInput) PrefixCodedTermsReader.this.in.clone(); - in.seek(termsStartPointer); - indexEnum = indexReader.getFieldEnum(fieldInfo); - doOrd = indexReader.supportsOrd(); - bytesReader = new DeltaBytesReader(in); - fieldTerm.field = fieldInfo.name; - state = postingsReader.newTermState(); - state.ord = -1; - } - - @Override - public Comparator getComparator() { - return termComp; - } - - @Override - public void cacheCurrentTerm() { - TermState stateCopy = (TermState) state.clone(); - stateCopy.filePointer = in.getFilePointer(); - termsCache.put(new FieldAndTerm(fieldInfo.name, bytesReader.term), - stateCopy); - } - - // called only from assert - private boolean first; - private int indexTermCount; - - private boolean startSeek() { - first = true; - indexTermCount = 0; - return true; - } - - private boolean checkSeekScan() { - if (!first && isIndexTerm) { - indexTermCount++; - if (indexTermCount >= indexReader.getDivisor()) { - //System.out.println("now fail count=" + indexTermCount); - return false; - } - } - first = false; - return true; - } - - /** Seeks until the first term that's >= the provided - * text; returns SeekStatus.FOUND if the exact term - * is found, SeekStatus.NOT_FOUND if a different term - * was found, SeekStatus.END if we hit EOF */ - @Override - public SeekStatus seek(BytesRef term, boolean useCache) throws IOException { - - if (indexEnum == null) { - throw new IllegalStateException("terms index was not loaded"); - } - - //System.out.println("te.seek term=" + fieldInfo.name + ":" + term.utf8ToString() + " current=" + term().utf8ToString() + " useCache=" + useCache + " this=" + this); - - // Check cache - fieldTerm.term = term; - TermState cachedState; - if (useCache) { - cachedState = termsCache.get(fieldTerm); - if (cachedState != null) { - state.copy(cachedState); - seekPending = true; - positioned = false; - bytesReader.term.copy(term); - //System.out.println(" cached!"); - return SeekStatus.FOUND; - } - } else { - cachedState = null; - } - - boolean doSeek = true; - - if (positioned) { - - final int cmp = termComp.compare(bytesReader.term, term); - - if (cmp == 0) { - // already at the requested term - return SeekStatus.FOUND; - } else if (cmp < 0) { - - if (seekPending) { - seekPending = false; - in.seek(state.filePointer); - indexEnum.seek(bytesReader.term); - didIndexNext = false; - } - - // Target term is after current term - if (!didIndexNext) { - if (indexEnum.next() == -1) { - nextIndexTerm = null; - } else { - nextIndexTerm = indexEnum.term(); - } - //System.out.println(" now do index next() nextIndexTerm=" + (nextIndexTerm == null ? "null" : nextIndexTerm.utf8ToString())); - didIndexNext = true; - } - - if (nextIndexTerm == null || termComp.compare(term, nextIndexTerm) < 0) { - // Optimization: requested term is within the - // same index block we are now in; skip seeking - // (but do scanning): - doSeek = false; - //System.out.println(" skip seek: nextIndexTerm=" + nextIndexTerm); - } - } - } - - if (doSeek) { - - positioned = true; - - // Ask terms index to find biggest index term that's <= - // our text: - in.seek(indexEnum.seek(term)); - didIndexNext = false; - if (doOrd) { - state.ord = indexEnum.ord()-1; - } - seekPending = false; - - // NOTE: the first next() after an index seek is - // wasteful, since it redundantly reads the same - // bytes into the buffer. We could avoid storing - // those bytes in the primary file, but then when - // scanning over an index term we'd have to - // special case it: - bytesReader.reset(indexEnum.term()); - //System.out.println(" doSeek term=" + indexEnum.term().utf8ToString() + " vs target=" + term.utf8ToString()); - } else { - //System.out.println(" skip seek"); - } - - assert startSeek(); - - // Now scan: - while (next() != null) { - final int cmp = termComp.compare(bytesReader.term, term); - if (cmp == 0) { - // Done! - if (useCache) { - // Store in cache - FieldAndTerm entryKey = new FieldAndTerm(fieldTerm); - cachedState = (TermState) state.clone(); - // this is fp after current term - cachedState.filePointer = in.getFilePointer(); - termsCache.put(entryKey, cachedState); - } - - return SeekStatus.FOUND; - } else if (cmp > 0) { - return SeekStatus.NOT_FOUND; - } - - // The purpose of the terms dict index is to seek - // the enum to the closest index term before the - // term we are looking for. So, we should never - // cross another index term (besides the first - // one) while we are scanning: - assert checkSeekScan(); - } - - positioned = false; - return SeekStatus.END; - } - - @Override - public BytesRef term() { - return bytesReader.term; - } - - @Override - public BytesRef next() throws IOException { - - if (seekPending) { - seekPending = false; - in.seek(state.filePointer); - indexEnum.seek(bytesReader.term); - didIndexNext = false; - } - - if (!bytesReader.read()) { - //System.out.println("te.next end!"); - positioned = false; - return null; - } - - final byte b = in.readByte(); - isIndexTerm = (b & 0x80) != 0; - - if ((b & 0x40) == 0) { - // Fast case -- docFreq fits in 6 bits - state.docFreq = b & 0x3F; - } else { - state.docFreq = (in.readVInt() << 6) | (b & 0x3F); - } - - postingsReader.readTerm(in, - fieldInfo, state, - isIndexTerm); - state.ord++; - positioned = true; - - //System.out.println("te.next term=" + bytesReader.term.utf8ToString()); - return bytesReader.term; - } - - @Override - public int docFreq() { - return state.docFreq; - } - - @Override - public DocsEnum docs(Bits skipDocs, DocsEnum reuse) throws IOException { - DocsEnum docsEnum = postingsReader.docs(fieldInfo, state, skipDocs, reuse); - assert docsEnum != null; - return docsEnum; - } - - @Override - public DocsAndPositionsEnum docsAndPositions(Bits skipDocs, DocsAndPositionsEnum reuse) throws IOException { - if (fieldInfo.omitTermFreqAndPositions) { - return null; - } else { - return postingsReader.docsAndPositions(fieldInfo, state, skipDocs, reuse); - } - } - - @Override - public SeekStatus seek(long ord) throws IOException { - - if (indexEnum == null) { - throw new IllegalStateException("terms index was not loaded"); - } - - if (ord >= numTerms) { - state.ord = numTerms-1; - return SeekStatus.END; - } - - in.seek(indexEnum.seek(ord)); - seekPending = false; - positioned = true; - - // NOTE: the first next() after an index seek is - // wasteful, since it redundantly reads the same - // bytes into the buffer - bytesReader.reset(indexEnum.term()); - - state.ord = indexEnum.ord()-1; - assert state.ord >= -1: "ord=" + state.ord; - - // Now, scan: - int left = (int) (ord - state.ord); - while(left > 0) { - final BytesRef term = next(); - assert term != null; - left--; - } - - // always found - return SeekStatus.FOUND; - } - - @Override - public long ord() { - if (!doOrd) { - throw new UnsupportedOperationException(); - } - return state.ord; - } - } - } -} diff --git a/lucene/src/java/org/apache/lucene/index/codecs/PrefixCodedTermsWriter.java b/lucene/src/java/org/apache/lucene/index/codecs/PrefixCodedTermsWriter.java deleted file mode 100644 index 377e3e55647..00000000000 --- a/lucene/src/java/org/apache/lucene/index/codecs/PrefixCodedTermsWriter.java +++ /dev/null @@ -1,206 +0,0 @@ -package org.apache.lucene.index.codecs; - -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import java.io.IOException; -import java.util.ArrayList; -import java.util.List; -import java.util.Comparator; - -import org.apache.lucene.index.FieldInfo; -import org.apache.lucene.index.FieldInfos; -import org.apache.lucene.index.IndexFileNames; -import org.apache.lucene.index.SegmentWriteState; -import org.apache.lucene.util.BytesRef; -import org.apache.lucene.store.IndexOutput; -import org.apache.lucene.util.CodecUtil; - -/** - * Writes terms dict and interacts with docs/positions - * consumers to write the postings files. - * - * The [new] terms dict format is field-centric: each field - * has its own section in the file. Fields are written in - * UTF16 string comparison order. Within each field, each - * term's text is written in UTF16 string comparison order. - * @lucene.experimental - */ - -public class PrefixCodedTermsWriter extends FieldsConsumer { - - final static String CODEC_NAME = "STANDARD_TERMS_DICT"; - - // Initial format - public static final int VERSION_START = 0; - - public static final int VERSION_CURRENT = VERSION_START; - - /** Extension of terms file */ - static final String TERMS_EXTENSION = "tis"; - - private final DeltaBytesWriter termWriter; - - protected final IndexOutput out; - final PostingsWriterBase postingsWriter; - final FieldInfos fieldInfos; - FieldInfo currentField; - private final TermsIndexWriterBase termsIndexWriter; - private final List fields = new ArrayList(); - private final Comparator termComp; - - public PrefixCodedTermsWriter( - TermsIndexWriterBase termsIndexWriter, - SegmentWriteState state, - PostingsWriterBase postingsWriter, - Comparator termComp) throws IOException - { - final String termsFileName = IndexFileNames.segmentFileName(state.segmentName, state.codecId, TERMS_EXTENSION); - this.termsIndexWriter = termsIndexWriter; - this.termComp = termComp; - out = state.directory.createOutput(termsFileName); - termsIndexWriter.setTermsOutput(out); - - fieldInfos = state.fieldInfos; - writeHeader(out); - termWriter = new DeltaBytesWriter(out); - currentField = null; - this.postingsWriter = postingsWriter; - - postingsWriter.start(out); // have consumer write its format/header - } - - protected void writeHeader(IndexOutput out) throws IOException { - // Count indexed fields up front - CodecUtil.writeHeader(out, CODEC_NAME, VERSION_CURRENT); - - out.writeLong(0); // leave space for end index pointer - } - - @Override - public TermsConsumer addField(FieldInfo field) throws IOException { - assert currentField == null || currentField.name.compareTo(field.name) < 0 : "current field name " + (currentField == null? null: currentField.name) + " given: " +field.name; - currentField = field; - TermsIndexWriterBase.FieldWriter fieldIndexWriter = termsIndexWriter.addField(field); - TermsConsumer terms = new TermsWriter(fieldIndexWriter, field, postingsWriter); - fields.add(terms); - return terms; - } - - @Override - public void close() throws IOException { - - try { - final int fieldCount = fields.size(); - - final long dirStart = out.getFilePointer(); - - out.writeInt(fieldCount); - for(int i=0;i getComparator() { - return termComp; - } - - @Override - public PostingsConsumer startTerm(BytesRef text) throws IOException { - postingsWriter.startTerm(); - return postingsWriter; - } - - @Override - public void finishTerm(BytesRef text, int numDocs) throws IOException { - - assert numDocs > 0; - //System.out.println("finishTerm term=" + fieldInfo.name + ":" + text.utf8ToString() + " fp=" + out.getFilePointer()); - - final boolean isIndexTerm = fieldIndexWriter.checkIndexTerm(text, numDocs); - - termWriter.write(text); - final int highBit = isIndexTerm ? 0x80 : 0; - //System.out.println(" isIndex=" + isIndexTerm); - - // This is a vInt, except, we steal top bit to record - // whether this was an indexed term: - if ((numDocs & ~0x3F) == 0) { - // Fast case -- docFreq fits in 6 bits - out.writeByte((byte) (highBit | numDocs)); - } else { - // Write bottom 6 bits of docFreq, then write the - // remainder as vInt: - out.writeByte((byte) (highBit | 0x40 | (numDocs & 0x3F))); - out.writeVInt(numDocs >>> 6); - } - postingsWriter.finishTerm(numDocs, isIndexTerm); - numTerms++; - } - - // Finishes all terms in this field - @Override - public void finish() throws IOException { - // EOF marker: - out.writeVInt(DeltaBytesWriter.TERM_EOF); - fieldIndexWriter.finish(); - } - } -} diff --git a/lucene/src/java/org/apache/lucene/index/codecs/DeltaBytesReader.java b/lucene/src/java/org/apache/lucene/index/codecs/TermStats.java similarity index 50% rename from lucene/src/java/org/apache/lucene/index/codecs/DeltaBytesReader.java rename to lucene/src/java/org/apache/lucene/index/codecs/TermStats.java index 0514dad96a7..bb2b6f34d27 100644 --- a/lucene/src/java/org/apache/lucene/index/codecs/DeltaBytesReader.java +++ b/lucene/src/java/org/apache/lucene/index/codecs/TermStats.java @@ -17,36 +17,12 @@ package org.apache.lucene.index.codecs; * limitations under the License. */ -import org.apache.lucene.store.IndexInput; -import org.apache.lucene.util.BytesRef; +public class TermStats { + public final int docFreq; + public final long totalTermFreq; -import java.io.IOException; - -// Handles reading incremental UTF8 encoded terms -final class DeltaBytesReader { - final BytesRef term = new BytesRef(); - final IndexInput in; - - DeltaBytesReader(IndexInput in) { - this.in = in; - term.bytes = new byte[10]; - } - - void reset(BytesRef text) { - term.copy(text); - } - - boolean read() throws IOException { - final int start = in.readVInt(); - if (start == DeltaBytesWriter.TERM_EOF) { - return false; - } - final int suffix = in.readVInt(); - assert start <= term.length: "start=" + start + " length=" + term.length; - final int newLength = start+suffix; - term.grow(newLength); - in.readBytes(term.bytes, start, suffix); - term.length = newLength; - return true; + public TermStats(int docFreq, long totalTermFreq) { + this.docFreq = docFreq; + this.totalTermFreq = totalTermFreq; } } diff --git a/lucene/src/java/org/apache/lucene/index/codecs/TermsConsumer.java b/lucene/src/java/org/apache/lucene/index/codecs/TermsConsumer.java index 48fc7e01660..93b578ce17c 100644 --- a/lucene/src/java/org/apache/lucene/index/codecs/TermsConsumer.java +++ b/lucene/src/java/org/apache/lucene/index/codecs/TermsConsumer.java @@ -38,10 +38,10 @@ public abstract class TermsConsumer { public abstract PostingsConsumer startTerm(BytesRef text) throws IOException; /** Finishes the current term; numDocs must be > 0. */ - public abstract void finishTerm(BytesRef text, int numDocs) throws IOException; + public abstract void finishTerm(BytesRef text, TermStats stats) throws IOException; /** Called when we are done adding terms to this field */ - public abstract void finish() throws IOException; + public abstract void finish(long sumTotalTermFreq) throws IOException; /** Return the BytesRef Comparator used to sort terms * before feeding to this API. */ @@ -55,6 +55,8 @@ public abstract class TermsConsumer { BytesRef term; assert termsEnum != null; + long sumTotalTermFreq = 0; + long sumDF = 0; if (mergeState.fieldInfo.omitTermFreqAndPositions) { if (docsEnum == null) { @@ -69,9 +71,14 @@ public abstract class TermsConsumer { if (docsEnumIn != null) { docsEnum.reset(docsEnumIn); final PostingsConsumer postingsConsumer = startTerm(term); - final int numDocs = postingsConsumer.merge(mergeState, docsEnum); - if (numDocs > 0) { - finishTerm(term, numDocs); + final TermStats stats = postingsConsumer.merge(mergeState, docsEnum); + if (stats.docFreq > 0) { + finishTerm(term, stats); + sumDF += stats.docFreq; + if (sumDF > 60000) { + mergeState.checkAbort.work(sumDF/5.0); + sumDF = 0; + } } } } @@ -94,14 +101,20 @@ public abstract class TermsConsumer { } } final PostingsConsumer postingsConsumer = startTerm(term); - final int numDocs = postingsConsumer.merge(mergeState, postingsEnum); - if (numDocs > 0) { - finishTerm(term, numDocs); + final TermStats stats = postingsConsumer.merge(mergeState, postingsEnum); + if (stats.docFreq > 0) { + finishTerm(term, stats); + sumTotalTermFreq += stats.totalTermFreq; + sumDF += stats.docFreq; + if (sumDF > 60000) { + mergeState.checkAbort.work(sumDF/5.0); + sumDF = 0; + } } } } } - finish(); + finish(sumTotalTermFreq); } } diff --git a/lucene/src/java/org/apache/lucene/index/codecs/TermsIndexWriterBase.java b/lucene/src/java/org/apache/lucene/index/codecs/TermsIndexWriterBase.java index e74cd1a52d0..53f1a7e7d81 100644 --- a/lucene/src/java/org/apache/lucene/index/codecs/TermsIndexWriterBase.java +++ b/lucene/src/java/org/apache/lucene/index/codecs/TermsIndexWriterBase.java @@ -17,7 +17,6 @@ package org.apache.lucene.index.codecs; * limitations under the License. */ -import org.apache.lucene.store.IndexOutput; import org.apache.lucene.index.FieldInfo; import org.apache.lucene.util.BytesRef; import java.io.IOException; @@ -25,14 +24,13 @@ import java.io.IOException; /** @lucene.experimental */ public abstract class TermsIndexWriterBase { - public abstract void setTermsOutput(IndexOutput out); - public abstract class FieldWriter { - public abstract boolean checkIndexTerm(BytesRef text, int docFreq) throws IOException; - public abstract void finish() throws IOException; + public abstract boolean checkIndexTerm(BytesRef text, TermStats stats) throws IOException; + public abstract void add(BytesRef text, TermStats stats, long termsFilePointer) throws IOException; + public abstract void finish(long termsFilePointer) throws IOException; } - public abstract FieldWriter addField(FieldInfo fieldInfo) throws IOException; + public abstract FieldWriter addField(FieldInfo fieldInfo, long termsFilePointer) throws IOException; public abstract void close() throws IOException; } diff --git a/lucene/src/java/org/apache/lucene/index/codecs/VariableGapTermsIndexReader.java b/lucene/src/java/org/apache/lucene/index/codecs/VariableGapTermsIndexReader.java index 60ca441c51f..68ec78ab023 100644 --- a/lucene/src/java/org/apache/lucene/index/codecs/VariableGapTermsIndexReader.java +++ b/lucene/src/java/org/apache/lucene/index/codecs/VariableGapTermsIndexReader.java @@ -164,9 +164,6 @@ public class VariableGapTermsIndexReader extends TermsIndexReaderBase { this.fieldInfo = fieldInfo; this.indexStart = indexStart; - // We still create the indexReader when indexDivisor - // is -1, so that PrefixCodedTermsReader can call - // isIndexTerm for each field: if (indexDivisor > 0) { loadTermsIndex(); } diff --git a/lucene/src/java/org/apache/lucene/index/codecs/VariableGapTermsIndexWriter.java b/lucene/src/java/org/apache/lucene/index/codecs/VariableGapTermsIndexWriter.java index 12195e813ae..e4cba764738 100644 --- a/lucene/src/java/org/apache/lucene/index/codecs/VariableGapTermsIndexWriter.java +++ b/lucene/src/java/org/apache/lucene/index/codecs/VariableGapTermsIndexWriter.java @@ -52,14 +52,14 @@ public class VariableGapTermsIndexWriter extends TermsIndexWriterBase { private final List fields = new ArrayList(); private final FieldInfos fieldInfos; // unread - private IndexOutput termsOut; private final IndexTermSelector policy; /** @lucene.experimental */ public static abstract class IndexTermSelector { // Called sequentially on every term being written, // returning true if this term should be indexed - public abstract boolean isIndexTerm(BytesRef term, int docFreq); + public abstract boolean isIndexTerm(BytesRef term, TermStats stats); + public abstract void newField(FieldInfo fieldInfo); } /** Same policy as {@link FixedGapTermsIndexWriter} */ @@ -74,15 +74,20 @@ public class VariableGapTermsIndexWriter extends TermsIndexWriterBase { } @Override - public boolean isIndexTerm(BytesRef term, int docFreq) { + public boolean isIndexTerm(BytesRef term, TermStats stats) { if (count >= interval) { - count = 0; + count = 1; return true; } else { count++; return false; } } + + @Override + public void newField(FieldInfo fieldInfo) { + count = interval; + } } /** Sets an index term when docFreq >= docFreqThresh, or @@ -96,18 +101,26 @@ public class VariableGapTermsIndexWriter extends TermsIndexWriterBase { public EveryNOrDocFreqTermSelector(int docFreqThresh, int interval) { this.interval = interval; this.docFreqThresh = docFreqThresh; + + // First term is first indexed term: + count = interval; } @Override - public boolean isIndexTerm(BytesRef term, int docFreq) { - if (docFreq >= docFreqThresh || count >= interval) { - count = 0; + public boolean isIndexTerm(BytesRef term, TermStats stats) { + if (stats.docFreq >= docFreqThresh || count >= interval) { + count = 1; return true; } else { count++; return false; } } + + @Override + public void newField(FieldInfo fieldInfo) { + count = interval; + } } // TODO: it'd be nice to let the FST builder prune based @@ -158,14 +171,10 @@ public class VariableGapTermsIndexWriter extends TermsIndexWriterBase { } @Override - public void setTermsOutput(IndexOutput termsOut) { - this.termsOut = termsOut; - } - - @Override - public FieldWriter addField(FieldInfo field) throws IOException { - //System.out.println("VGW: field=" + field.name); - FSTFieldWriter writer = new FSTFieldWriter(field); + public FieldWriter addField(FieldInfo field, long termsFilePointer) throws IOException { + ////System.out.println("VGW: field=" + field.name); + policy.newField(field); + FSTFieldWriter writer = new FSTFieldWriter(field, termsFilePointer); fields.add(writer); return writer; } @@ -200,42 +209,48 @@ public class VariableGapTermsIndexWriter extends TermsIndexWriterBase { private final BytesRef lastTerm = new BytesRef(); private boolean first = true; - public FSTFieldWriter(FieldInfo fieldInfo) throws IOException { + public FSTFieldWriter(FieldInfo fieldInfo, long termsFilePointer) throws IOException { this.fieldInfo = fieldInfo; fstOutputs = PositiveIntOutputs.getSingleton(true); fstBuilder = new Builder(FST.INPUT_TYPE.BYTE1, 0, 0, true, fstOutputs); indexStart = out.getFilePointer(); - //System.out.println("VGW: field=" + fieldInfo.name); + ////System.out.println("VGW: field=" + fieldInfo.name); // Always put empty string in - fstBuilder.add(new BytesRef(), fstOutputs.get(termsOut.getFilePointer())); + fstBuilder.add(new BytesRef(), fstOutputs.get(termsFilePointer)); } @Override - public boolean checkIndexTerm(BytesRef text, int docFreq) throws IOException { - if (policy.isIndexTerm(text, docFreq) || first) { + public boolean checkIndexTerm(BytesRef text, TermStats stats) throws IOException { + //System.out.println("VGW: index term=" + text.utf8ToString()); + // NOTE: we must force the first term per field to be + // indexed, in case policy doesn't: + if (policy.isIndexTerm(text, stats) || first) { first = false; - //System.out.println("VGW: index term=" + text.utf8ToString() + " fp=" + termsOut.getFilePointer()); - final int lengthSave = text.length; - text.length = indexedTermPrefixLength(lastTerm, text); - try { - fstBuilder.add(text, fstOutputs.get(termsOut.getFilePointer())); - } finally { - text.length = lengthSave; - } - lastTerm.copy(text); + //System.out.println(" YES"); return true; } else { - //System.out.println("VGW: not index term=" + text.utf8ToString() + " fp=" + termsOut.getFilePointer()); lastTerm.copy(text); return false; } } @Override - public void finish() throws IOException { + public void add(BytesRef text, TermStats stats, long termsFilePointer) throws IOException { + final int lengthSave = text.length; + text.length = indexedTermPrefixLength(lastTerm, text); + try { + fstBuilder.add(text, fstOutputs.get(termsFilePointer)); + } finally { + text.length = lengthSave; + } + lastTerm.copy(text); + } + + @Override + public void finish(long termsFilePointer) throws IOException { fst = fstBuilder.finish(); if (fst != null) { fst.save(out); diff --git a/lucene/src/java/org/apache/lucene/index/codecs/intblock/FixedIntBlockIndexInput.java b/lucene/src/java/org/apache/lucene/index/codecs/intblock/FixedIntBlockIndexInput.java index 3fb9adcb3b6..1b6829dc28d 100644 --- a/lucene/src/java/org/apache/lucene/index/codecs/intblock/FixedIntBlockIndexInput.java +++ b/lucene/src/java/org/apache/lucene/index/codecs/intblock/FixedIntBlockIndexInput.java @@ -24,6 +24,7 @@ package org.apache.lucene.index.codecs.intblock; import java.io.IOException; import org.apache.lucene.index.codecs.sep.IntIndexInput; +import org.apache.lucene.store.DataInput; import org.apache.lucene.store.IndexInput; import org.apache.lucene.util.IntsRef; @@ -149,7 +150,7 @@ public abstract class FixedIntBlockIndexInput extends IntIndexInput { private int upto; @Override - public void read(final IndexInput indexIn, final boolean absolute) throws IOException { + public void read(final DataInput indexIn, final boolean absolute) throws IOException { if (absolute) { fp = indexIn.readVLong(); upto = indexIn.readVInt(); @@ -205,5 +206,10 @@ public abstract class FixedIntBlockIndexInput extends IntIndexInput { other.upto = upto; return other; } + + @Override + public String toString() { + return "fp=" + fp + " upto=" + upto; + } } } diff --git a/lucene/src/java/org/apache/lucene/index/codecs/intblock/FixedIntBlockIndexOutput.java b/lucene/src/java/org/apache/lucene/index/codecs/intblock/FixedIntBlockIndexOutput.java index 00658b0b5f1..8b5e4988fcd 100644 --- a/lucene/src/java/org/apache/lucene/index/codecs/intblock/FixedIntBlockIndexOutput.java +++ b/lucene/src/java/org/apache/lucene/index/codecs/intblock/FixedIntBlockIndexOutput.java @@ -111,6 +111,11 @@ public abstract class FixedIntBlockIndexOutput extends IntIndexOutput { lastUpto = upto; lastFP = fp; } + + @Override + public String toString() { + return "fp=" + fp + " upto=" + upto; + } } @Override diff --git a/lucene/src/java/org/apache/lucene/index/codecs/intblock/VariableIntBlockIndexInput.java b/lucene/src/java/org/apache/lucene/index/codecs/intblock/VariableIntBlockIndexInput.java index 6084df41ca4..0881587d041 100644 --- a/lucene/src/java/org/apache/lucene/index/codecs/intblock/VariableIntBlockIndexInput.java +++ b/lucene/src/java/org/apache/lucene/index/codecs/intblock/VariableIntBlockIndexInput.java @@ -24,6 +24,7 @@ package org.apache.lucene.index.codecs.intblock; import java.io.IOException; import org.apache.lucene.index.codecs.sep.IntIndexInput; +import org.apache.lucene.store.DataInput; import org.apache.lucene.store.IndexInput; import org.apache.lucene.util.IntsRef; @@ -168,7 +169,7 @@ public abstract class VariableIntBlockIndexInput extends IntIndexInput { private int upto; @Override - public void read(final IndexInput indexIn, final boolean absolute) throws IOException { + public void read(final DataInput indexIn, final boolean absolute) throws IOException { if (absolute) { fp = indexIn.readVLong(); upto = indexIn.readByte()&0xFF; diff --git a/lucene/src/java/org/apache/lucene/index/codecs/intblock/package.html b/lucene/src/java/org/apache/lucene/index/codecs/intblock/package.html new file mode 100644 index 00000000000..403ea1b55f6 --- /dev/null +++ b/lucene/src/java/org/apache/lucene/index/codecs/intblock/package.html @@ -0,0 +1,25 @@ + + + + + + + +Intblock: base support for fixed or variable length block integer encoders + + diff --git a/lucene/src/java/org/apache/lucene/index/codecs/package.html b/lucene/src/java/org/apache/lucene/index/codecs/package.html new file mode 100644 index 00000000000..78dcb95de64 --- /dev/null +++ b/lucene/src/java/org/apache/lucene/index/codecs/package.html @@ -0,0 +1,25 @@ + + + + + + + +Codecs API: API for customization of the encoding and structure of the index. + + diff --git a/lucene/src/java/org/apache/lucene/index/codecs/preflex/PreFlexFields.java b/lucene/src/java/org/apache/lucene/index/codecs/preflex/PreFlexFields.java index ec65dcf47d8..31cb23a4e58 100644 --- a/lucene/src/java/org/apache/lucene/index/codecs/preflex/PreFlexFields.java +++ b/lucene/src/java/org/apache/lucene/index/codecs/preflex/PreFlexFields.java @@ -269,6 +269,11 @@ public class PreFlexFields extends FieldsProducer { return BytesRef.getUTF8SortedAsUTF16Comparator(); } } + + @Override + public long getSumTotalTermFreq() { + return -1; + } } private class PreTermsEnum extends TermsEnum { @@ -540,7 +545,7 @@ public class PreFlexFields extends FieldsProducer { // We can easily detect S in UTF8: if a byte has // prefix 11110 (0xf0), then that byte and the // following 3 bytes encode a single unicode codepoint - // in S. Similary,we can detect E: if a byte has + // in S. Similarly, we can detect E: if a byte has // prefix 1110111 (0xee), then that byte and the // following 2 bytes encode a single unicode codepoint // in E. @@ -748,11 +753,6 @@ public class PreFlexFields extends FieldsProducer { } } - @Override - public void cacheCurrentTerm() throws IOException { - getTermsDict().cacheCurrentTerm(termEnum); - } - @Override public SeekStatus seek(long ord) throws IOException { throw new UnsupportedOperationException(); @@ -949,6 +949,11 @@ public class PreFlexFields extends FieldsProducer { return termEnum.docFreq(); } + @Override + public long totalTermFreq() { + return -1; + } + @Override public DocsEnum docs(Bits skipDocs, DocsEnum reuse) throws IOException { PreDocsEnum docsEnum; @@ -982,7 +987,7 @@ public class PreFlexFields extends FieldsProducer { private final class PreDocsEnum extends DocsEnum { final private SegmentTermDocs docs; - + private int docID = -1; PreDocsEnum() throws IOException { docs = new SegmentTermDocs(freqStream, getTermsDict(), fieldInfos); } @@ -1000,18 +1005,18 @@ public class PreFlexFields extends FieldsProducer { @Override public int nextDoc() throws IOException { if (docs.next()) { - return docs.doc(); + return docID = docs.doc(); } else { - return NO_MORE_DOCS; + return docID = NO_MORE_DOCS; } } @Override public int advance(int target) throws IOException { if (docs.skipTo(target)) { - return docs.doc(); + return docID = docs.doc(); } else { - return NO_MORE_DOCS; + return docID = NO_MORE_DOCS; } } @@ -1022,7 +1027,7 @@ public class PreFlexFields extends FieldsProducer { @Override public int docID() { - return docs.doc(); + return docID; } @Override @@ -1038,7 +1043,7 @@ public class PreFlexFields extends FieldsProducer { private final class PreDocsAndPositionsEnum extends DocsAndPositionsEnum { final private SegmentTermPositions pos; - + private int docID = -1; PreDocsAndPositionsEnum() throws IOException { pos = new SegmentTermPositions(freqStream, proxStream, getTermsDict(), fieldInfos); } @@ -1056,18 +1061,18 @@ public class PreFlexFields extends FieldsProducer { @Override public int nextDoc() throws IOException { if (pos.next()) { - return pos.doc(); + return docID = pos.doc(); } else { - return NO_MORE_DOCS; + return docID = NO_MORE_DOCS; } } @Override public int advance(int target) throws IOException { if (pos.skipTo(target)) { - return pos.doc(); + return docID = pos.doc(); } else { - return NO_MORE_DOCS; + return docID = NO_MORE_DOCS; } } @@ -1078,16 +1083,18 @@ public class PreFlexFields extends FieldsProducer { @Override public int docID() { - return pos.doc(); + return docID; } @Override public int nextPosition() throws IOException { + assert docID != NO_MORE_DOCS; return pos.nextPosition(); } @Override public boolean hasPayload() { + assert docID != NO_MORE_DOCS; return pos.isPayloadAvailable(); } diff --git a/lucene/src/java/org/apache/lucene/index/codecs/preflex/SegmentTermEnum.java b/lucene/src/java/org/apache/lucene/index/codecs/preflex/SegmentTermEnum.java index a8703ae83f1..fb7c8ceec46 100644 --- a/lucene/src/java/org/apache/lucene/index/codecs/preflex/SegmentTermEnum.java +++ b/lucene/src/java/org/apache/lucene/index/codecs/preflex/SegmentTermEnum.java @@ -45,7 +45,7 @@ public final class SegmentTermEnum implements Cloneable { // whenever you add a new format, make it 1 smaller (negative version logic)! public static final int FORMAT_CURRENT = FORMAT_VERSION_UTF8_LENGTH_IN_BYTES; - // when removing support for old versions, levae the last supported version here + // when removing support for old versions, leave the last supported version here public static final int FORMAT_MINIMUM = FORMAT_VERSION_UTF8_LENGTH_IN_BYTES; private TermBuffer termBuffer = new TermBuffer(); diff --git a/lucene/src/java/org/apache/lucene/index/codecs/preflex/SegmentTermPositions.java b/lucene/src/java/org/apache/lucene/index/codecs/preflex/SegmentTermPositions.java index f50d226741c..c642f6b1aaa 100644 --- a/lucene/src/java/org/apache/lucene/index/codecs/preflex/SegmentTermPositions.java +++ b/lucene/src/java/org/apache/lucene/index/codecs/preflex/SegmentTermPositions.java @@ -58,6 +58,7 @@ extends SegmentTermDocs { this.proxStreamOrig = proxStream; // the proxStream will be cloned lazily when nextPosition() is called for the first time } + @Override final void seek(TermInfo ti, Term term) throws IOException { super.seek(ti, term); if (ti != null) @@ -69,6 +70,7 @@ extends SegmentTermDocs { needToLoadPayload = false; } + @Override public final void close() throws IOException { super.close(); if (proxStream != null) proxStream.close(); @@ -100,11 +102,13 @@ extends SegmentTermDocs { return delta; } + @Override protected final void skippingDoc() throws IOException { // we remember to skip a document lazily lazySkipProxCount += freq; } + @Override public final boolean next() throws IOException { // we remember to skip the remaining positions of the current // document lazily @@ -118,12 +122,14 @@ extends SegmentTermDocs { return false; } + @Override public final int read(final int[] docs, final int[] freqs) { throw new UnsupportedOperationException("TermPositions does not support processing multiple documents in one call. Use TermDocs instead."); } /** Called by super.skipTo(). */ + @Override protected void skipProx(long proxPointer, int payloadLength) throws IOException { // we save the pointer, we might have to skip there lazily lazySkipPointer = proxPointer; diff --git a/lucene/src/java/org/apache/lucene/index/codecs/preflex/TermInfosReader.java b/lucene/src/java/org/apache/lucene/index/codecs/preflex/TermInfosReader.java index adf0535390d..8205e73b972 100644 --- a/lucene/src/java/org/apache/lucene/index/codecs/preflex/TermInfosReader.java +++ b/lucene/src/java/org/apache/lucene/index/codecs/preflex/TermInfosReader.java @@ -67,15 +67,18 @@ public final class TermInfosReader { this.term = t; } + @Override public boolean equals(Object other) { CloneableTerm t = (CloneableTerm) other; return this.term.equals(t.term); } + @Override public int hashCode() { return term.hashCode(); } + @Override public Object clone() { return new CloneableTerm(term); } diff --git a/lucene/src/java/org/apache/lucene/index/codecs/preflex/package.html b/lucene/src/java/org/apache/lucene/index/codecs/preflex/package.html new file mode 100644 index 00000000000..c6c96c978c2 --- /dev/null +++ b/lucene/src/java/org/apache/lucene/index/codecs/preflex/package.html @@ -0,0 +1,25 @@ + + + + + + + +Preflex codec: supports Lucene 3.x indexes (readonly) + + diff --git a/lucene/src/java/org/apache/lucene/index/codecs/pulsing/PulsingCodec.java b/lucene/src/java/org/apache/lucene/index/codecs/pulsing/PulsingCodec.java index 19cf99a733c..0867425baa5 100644 --- a/lucene/src/java/org/apache/lucene/index/codecs/pulsing/PulsingCodec.java +++ b/lucene/src/java/org/apache/lucene/index/codecs/pulsing/PulsingCodec.java @@ -32,8 +32,8 @@ import org.apache.lucene.index.codecs.FieldsConsumer; import org.apache.lucene.index.codecs.FieldsProducer; import org.apache.lucene.index.codecs.FixedGapTermsIndexReader; import org.apache.lucene.index.codecs.FixedGapTermsIndexWriter; -import org.apache.lucene.index.codecs.PrefixCodedTermsReader; -import org.apache.lucene.index.codecs.PrefixCodedTermsWriter; +import org.apache.lucene.index.codecs.BlockTermsReader; +import org.apache.lucene.index.codecs.BlockTermsWriter; import org.apache.lucene.index.codecs.TermsIndexReaderBase; import org.apache.lucene.index.codecs.TermsIndexWriterBase; import org.apache.lucene.index.codecs.standard.StandardCodec; @@ -89,7 +89,7 @@ public class PulsingCodec extends Codec { // Terms dict success = false; try { - FieldsConsumer ret = new PrefixCodedTermsWriter(indexWriter, state, pulsingWriter, BytesRef.getUTF8SortedAsUnicodeComparator()); + FieldsConsumer ret = new BlockTermsWriter(indexWriter, state, pulsingWriter, BytesRef.getUTF8SortedAsUnicodeComparator()); success = true; return ret; } finally { @@ -132,13 +132,13 @@ public class PulsingCodec extends Codec { // Terms dict reader success = false; try { - FieldsProducer ret = new PrefixCodedTermsReader(indexReader, - state.dir, state.fieldInfos, state.segmentInfo.name, - pulsingReader, - state.readBufferSize, - BytesRef.getUTF8SortedAsUnicodeComparator(), - StandardCodec.TERMS_CACHE_SIZE, - state.codecId); + FieldsProducer ret = new BlockTermsReader(indexReader, + state.dir, state.fieldInfos, state.segmentInfo.name, + pulsingReader, + state.readBufferSize, + BytesRef.getUTF8SortedAsUnicodeComparator(), + StandardCodec.TERMS_CACHE_SIZE, + state.codecId); success = true; return ret; } finally { @@ -155,7 +155,7 @@ public class PulsingCodec extends Codec { @Override public void files(Directory dir, SegmentInfo segmentInfo, String id, Set files) throws IOException { StandardPostingsReader.files(dir, segmentInfo, id, files); - PrefixCodedTermsReader.files(dir, segmentInfo, id, files); + BlockTermsReader.files(dir, segmentInfo, id, files); FixedGapTermsIndexReader.files(dir, segmentInfo, id, files); } diff --git a/lucene/src/java/org/apache/lucene/index/codecs/pulsing/PulsingPostingsReaderImpl.java b/lucene/src/java/org/apache/lucene/index/codecs/pulsing/PulsingPostingsReaderImpl.java index 4914b36059a..6adab4d9f19 100644 --- a/lucene/src/java/org/apache/lucene/index/codecs/pulsing/PulsingPostingsReaderImpl.java +++ b/lucene/src/java/org/apache/lucene/index/codecs/pulsing/PulsingPostingsReaderImpl.java @@ -19,14 +19,15 @@ package org.apache.lucene.index.codecs.pulsing; import java.io.IOException; +import org.apache.lucene.index.DocsAndPositionsEnum; import org.apache.lucene.index.DocsEnum; import org.apache.lucene.index.FieldInfo; -import org.apache.lucene.index.DocsAndPositionsEnum; -import org.apache.lucene.index.codecs.TermState; +import org.apache.lucene.index.TermState; import org.apache.lucene.index.codecs.PostingsReaderBase; -import org.apache.lucene.index.codecs.pulsing.PulsingPostingsWriterImpl.Document; -import org.apache.lucene.index.codecs.pulsing.PulsingPostingsWriterImpl.Position; +import org.apache.lucene.index.codecs.BlockTermState; +import org.apache.lucene.store.ByteArrayDataInput; import org.apache.lucene.store.IndexInput; +import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.CodecUtil; @@ -43,7 +44,7 @@ public class PulsingPostingsReaderImpl extends PostingsReaderBase { // Fallback reader for non-pulsed terms: final PostingsReaderBase wrappedPostingsReader; - int maxPulsingDocFreq; + int maxPositions; public PulsingPostingsReaderImpl(PostingsReaderBase wrappedPostingsReader) throws IOException { this.wrappedPostingsReader = wrappedPostingsReader; @@ -53,145 +54,139 @@ public class PulsingPostingsReaderImpl extends PostingsReaderBase { public void init(IndexInput termsIn) throws IOException { CodecUtil.checkHeader(termsIn, PulsingPostingsWriterImpl.CODEC, PulsingPostingsWriterImpl.VERSION_START, PulsingPostingsWriterImpl.VERSION_START); - maxPulsingDocFreq = termsIn.readVInt(); + maxPositions = termsIn.readVInt(); wrappedPostingsReader.init(termsIn); } - private static class PulsingTermState extends TermState { - private Document docs[]; - private TermState wrappedTermState; - private boolean pendingIndexTerm; + private static class PulsingTermState extends BlockTermState { + private byte[] postings; + private int postingsSize; // -1 if this term was not inlined + private BlockTermState wrappedTermState; + ByteArrayDataInput inlinedBytesReader; + private byte[] inlinedBytes; + + @Override public Object clone() { PulsingTermState clone; clone = (PulsingTermState) super.clone(); - clone.docs = docs.clone(); - for(int i=0;i>>1; - if ((code & 1) != 0) { - doc.numPositions = 1; - } else { - doc.numPositions = termsIn.readVInt(); - } - - if (doc.numPositions > doc.positions.length) { - doc.reallocPositions(doc.numPositions); - } - - int position = 0; - int payloadLength = -1; - - for(int j=0;j>> 1; - if ((code2 & 1) != 0) { - payloadLength = termsIn.readVInt(); - } - - if (payloadLength > 0) { - if (pos.payload == null) { - pos.payload = new BytesRef(); - pos.payload.bytes = new byte[payloadLength]; - } else if (payloadLength > pos.payload.bytes.length) { - pos.payload.grow(payloadLength); - } - pos.payload.length = payloadLength; - termsIn.readBytes(pos.payload.bytes, 0, payloadLength); - } else if (pos.payload != null) { - pos.payload.length = 0; - } - } else { - position += code2; - } - pos.pos = position; - } - } - doc.docID = docID; + // Inlined into terms dict -- just read the byte[] blob in, + // but don't decode it now (we only decode when a DocsEnum + // or D&PEnum is pulled): + termState.postingsSize = termState.inlinedBytesReader.readVInt(); + if (termState.postings == null || termState.postings.length < termState.postingsSize) { + termState.postings = new byte[ArrayUtil.oversize(termState.postingsSize, 1)]; } + // TODO: sort of silly to copy from one big byte[] + // (the blob holding all inlined terms' blobs for + // current term block) into another byte[] (just the + // blob for this term)... + termState.inlinedBytesReader.readBytes(termState.postings, 0, termState.postingsSize); } else { + //System.out.println(" not inlined"); + termState.postingsSize = -1; + // TODO: should we do full copyFrom? much heavier...? termState.wrappedTermState.docFreq = termState.docFreq; - wrappedPostingsReader.readTerm(termsIn, fieldInfo, termState.wrappedTermState, termState.pendingIndexTerm); - termState.pendingIndexTerm = false; + termState.wrappedTermState.totalTermFreq = termState.totalTermFreq; + wrappedPostingsReader.nextTerm(fieldInfo, termState.wrappedTermState); + termState.wrappedTermState.termCount++; } } // TODO: we could actually reuse, by having TL that // holds the last wrapped reuse, and vice-versa @Override - public DocsEnum docs(FieldInfo field, TermState _termState, Bits skipDocs, DocsEnum reuse) throws IOException { + public DocsEnum docs(FieldInfo field, BlockTermState _termState, Bits skipDocs, DocsEnum reuse) throws IOException { PulsingTermState termState = (PulsingTermState) _termState; - if (termState.docFreq <= maxPulsingDocFreq) { + if (termState.postingsSize != -1) { + PulsingDocsEnum postings; if (reuse instanceof PulsingDocsEnum) { - return ((PulsingDocsEnum) reuse).reset(skipDocs, termState); + postings = (PulsingDocsEnum) reuse; + if (!postings.canReuse(field)) { + postings = new PulsingDocsEnum(field); + } } else { - PulsingDocsEnum docsEnum = new PulsingDocsEnum(); - return docsEnum.reset(skipDocs, termState); + postings = new PulsingDocsEnum(field); } + return postings.reset(skipDocs, termState); } else { + // TODO: not great that we lose reuse of PulsingDocsEnum in this case: if (reuse instanceof PulsingDocsEnum) { return wrappedPostingsReader.docs(field, termState.wrappedTermState, skipDocs, null); } else { @@ -202,15 +197,26 @@ public class PulsingPostingsReaderImpl extends PostingsReaderBase { // TODO: -- not great that we can't always reuse @Override - public DocsAndPositionsEnum docsAndPositions(FieldInfo field, TermState _termState, Bits skipDocs, DocsAndPositionsEnum reuse) throws IOException { - PulsingTermState termState = (PulsingTermState) _termState; - if (termState.docFreq <= maxPulsingDocFreq) { + public DocsAndPositionsEnum docsAndPositions(FieldInfo field, BlockTermState _termState, Bits skipDocs, DocsAndPositionsEnum reuse) throws IOException { + if (field.omitTermFreqAndPositions) { + return null; + } + //System.out.println("D&P: field=" + field.name); + + final PulsingTermState termState = (PulsingTermState) _termState; + + if (termState.postingsSize != -1) { + PulsingDocsAndPositionsEnum postings; if (reuse instanceof PulsingDocsAndPositionsEnum) { - return ((PulsingDocsAndPositionsEnum) reuse).reset(skipDocs, termState); + postings = (PulsingDocsAndPositionsEnum) reuse; + if (!postings.canReuse(field)) { + postings = new PulsingDocsAndPositionsEnum(field); + } } else { - PulsingDocsAndPositionsEnum postingsEnum = new PulsingDocsAndPositionsEnum(); - return postingsEnum.reset(skipDocs, termState); + postings = new PulsingDocsAndPositionsEnum(field); } + + return postings.reset(skipDocs, termState); } else { if (reuse instanceof PulsingDocsAndPositionsEnum) { return wrappedPostingsReader.docsAndPositions(field, termState.wrappedTermState, skipDocs, null); @@ -220,63 +226,90 @@ public class PulsingPostingsReaderImpl extends PostingsReaderBase { } } - static class PulsingDocsEnum extends DocsEnum { - private int nextRead; + private static class PulsingDocsEnum extends DocsEnum { + private final ByteArrayDataInput postings = new ByteArrayDataInput(null); + private final boolean omitTF; + private final boolean storePayloads; private Bits skipDocs; - private Document doc; - private PulsingTermState state; + private int docID; + private int freq; - public void close() {} + public PulsingDocsEnum(FieldInfo fieldInfo) { + omitTF = fieldInfo.omitTermFreqAndPositions; + storePayloads = fieldInfo.storePayloads; + } - PulsingDocsEnum reset(Bits skipDocs, PulsingTermState termState) { - // TODO: -- not great we have to clone here -- - // merging is wasteful; TermRangeQuery too - state = (PulsingTermState) termState.clone(); + public PulsingDocsEnum reset(Bits skipDocs, PulsingTermState termState) { + //System.out.println("PR docsEnum termState=" + termState + " docFreq=" + termState.docFreq); + assert termState.postingsSize != -1; + final byte[] bytes = new byte[termState.postingsSize]; + System.arraycopy(termState.postings, 0, bytes, 0, termState.postingsSize); + postings.reset(bytes); + docID = 0; + freq = 1; this.skipDocs = skipDocs; - nextRead = 0; return this; } - @Override - public int nextDoc() { - while(true) { - if (nextRead >= state.docFreq) { - return NO_MORE_DOCS; - } else { - doc = state.docs[nextRead++]; - if (skipDocs == null || !skipDocs.get(doc.docID)) { - return doc.docID; - } - } - } + boolean canReuse(FieldInfo fieldInfo) { + return omitTF == fieldInfo.omitTermFreqAndPositions && storePayloads == fieldInfo.storePayloads; } @Override - public int read() { - int i=0; - // TODO: -- ob1? - initBulkResult(); - final int[] docs = bulkResult.docs.ints; - final int[] freqs = bulkResult.freqs.ints; - while(nextRead < state.docFreq) { - doc = state.docs[nextRead++]; - if (skipDocs == null || !skipDocs.get(doc.docID)) { - docs[i] = doc.docID; - freqs[i] = doc.numPositions; - i++; + public int nextDoc() throws IOException { + //System.out.println("PR nextDoc this= "+ this); + while(true) { + if (postings.eof()) { + //System.out.println("PR END"); + return docID = NO_MORE_DOCS; + } + + final int code = postings.readVInt(); + if (omitTF) { + docID += code; + } else { + docID += code >>> 1; // shift off low bit + if ((code & 1) != 0) { // if low bit is set + freq = 1; // freq is one + } else { + freq = postings.readVInt(); // else read freq + } + + // Skip positions + if (storePayloads) { + int payloadLength = -1; + for(int pos=0;pos>> 1; // shift off low bit + if ((code & 1) != 0) { // if low bit is set + freq = 1; // freq is one } else { - doc = state.docs[nextRead++]; - if (skipDocs == null || !skipDocs.get(doc.docID)) { - nextPosRead = 0; - return doc.docID; - } + freq = postings.readVInt(); // else read freq + } + posPending = freq; + + if (skipDocs == null || !skipDocs.get(docID)) { + //System.out.println(" return docID=" + docID + " freq=" + freq); + position = 0; + return docID; } } } @Override public int freq() { - return doc.numPositions; + return freq; } @Override public int docID() { - return doc.docID; + return docID; } @Override @@ -347,26 +406,72 @@ public class PulsingPostingsReaderImpl extends PostingsReaderBase { return doc; } } - return NO_MORE_DOCS; + return docID = NO_MORE_DOCS; } @Override - public int nextPosition() { - assert nextPosRead < doc.numPositions; - pos = doc.positions[nextPosRead++]; - payloadRetrieved = false; - return pos.pos; + public int nextPosition() throws IOException { + //System.out.println("PR d&p nextPosition posPending=" + posPending + " vs freq=" + freq); + + assert posPending > 0; + posPending--; + + if (storePayloads) { + if (!payloadRetrieved) { + //System.out.println("PR skip payload=" + payloadLength); + postings.skipBytes(payloadLength); + } + final int code = postings.readVInt(); + //System.out.println("PR code=" + code); + if ((code & 1) != 0) { + payloadLength = postings.readVInt(); + //System.out.println("PR new payload len=" + payloadLength); + } + position += code >> 1; + payloadRetrieved = false; + } else { + position += postings.readVInt(); + } + + //System.out.println("PR d&p nextPos return pos=" + position + " this=" + this); + return position; + } + + private void skipPositions() throws IOException { + while(posPending != 0) { + nextPosition(); + } + if (storePayloads && !payloadRetrieved) { + //System.out.println(" skip payload len=" + payloadLength); + postings.skipBytes(payloadLength); + payloadRetrieved = true; + } } @Override public boolean hasPayload() { - return !payloadRetrieved && pos.payload != null && pos.payload.length > 0; + return storePayloads && !payloadRetrieved && payloadLength > 0; } @Override - public BytesRef getPayload() { + public BytesRef getPayload() throws IOException { + //System.out.println("PR getPayload payloadLength=" + payloadLength + " this=" + this); + if (payloadRetrieved) { + throw new IOException("Either no payload exists at this term position or an attempt was made to load it more than once."); + } payloadRetrieved = true; - return pos.payload; + if (payloadLength > 0) { + if (payload == null) { + payload = new BytesRef(payloadLength); + } else { + payload.grow(payloadLength); + } + postings.readBytes(payload.bytes, 0, payloadLength); + payload.length = payloadLength; + return payload; + } else { + return null; + } } } diff --git a/lucene/src/java/org/apache/lucene/index/codecs/pulsing/PulsingPostingsWriterImpl.java b/lucene/src/java/org/apache/lucene/index/codecs/pulsing/PulsingPostingsWriterImpl.java index f18637d29ad..35b2a3d0278 100644 --- a/lucene/src/java/org/apache/lucene/index/codecs/pulsing/PulsingPostingsWriterImpl.java +++ b/lucene/src/java/org/apache/lucene/index/codecs/pulsing/PulsingPostingsWriterImpl.java @@ -20,17 +20,17 @@ package org.apache.lucene.index.codecs.pulsing; import java.io.IOException; import org.apache.lucene.index.FieldInfo; -import org.apache.lucene.util.CodecUtil; import org.apache.lucene.index.codecs.PostingsWriterBase; +import org.apache.lucene.index.codecs.TermStats; import org.apache.lucene.store.IndexOutput; -import org.apache.lucene.util.ArrayUtil; +import org.apache.lucene.store.RAMOutputStream; import org.apache.lucene.util.BytesRef; -import org.apache.lucene.util.RamUsageEstimator; +import org.apache.lucene.util.CodecUtil; -// TODO: we now pulse entirely according to docFreq of the -// term; it might be better to eg pulse by "net bytes used" -// so that a term that has only 1 doc but zillions of -// positions would not be inlined. Though this is +// TODO: we now inline based on total TF of the term, +// but it might be better to inline by "net bytes used" +// so that a term that has only 1 posting but a huge +// payload would not be inlined. Though this is // presumably rare in practice... /** @lucene.experimental */ @@ -44,86 +44,42 @@ public final class PulsingPostingsWriterImpl extends PostingsWriterBase { final static int VERSION_CURRENT = VERSION_START; - IndexOutput termsOut; + private IndexOutput termsOut; - boolean omitTF; - boolean storePayloads; + private boolean omitTF; + private boolean storePayloads; - // Starts a new term - FieldInfo fieldInfo; + // one entry per position + private final Position[] pending; + private int pendingCount = 0; // -1 once we've hit too many positions + private Position currentDoc; // first Position entry of current doc - /** @lucene.experimental */ - public static class Document { - int docID; - int termDocFreq; - int numPositions; - Position[] positions; - Document() { - positions = new Position[1]; - positions[0] = new Position(); - } - - @Override - public Object clone() { - Document doc = new Document(); - doc.docID = docID; - doc.termDocFreq = termDocFreq; - doc.numPositions = numPositions; - doc.positions = new Position[positions.length]; - for(int i = 0; i < positions.length; i++) { - doc.positions[i] = (Position) positions[i].clone(); - } - - return doc; - } - - void reallocPositions(int minSize) { - final Position[] newArray = new Position[ArrayUtil.oversize(minSize, RamUsageEstimator.NUM_BYTES_OBJECT_REF)]; - System.arraycopy(positions, 0, newArray, 0, positions.length); - for(int i=positions.length;i maxPulsingDocFreq docs - - static class Position { + private static final class Position { BytesRef payload; + int termFreq; // only incremented on first position for a given doc int pos; - - @Override - public Object clone() { - Position position = new Position(); - position.pos = pos; - if (payload != null) { - position.payload = new BytesRef(payload); - } - return position; - } + int docID; } // TODO: -- lazy init this? ie, if every single term - // was pulsed then we never need to use this fallback? - // Fallback writer for non-pulsed terms: + // was inlined (eg for a "primary key" field) then we + // never need to use this fallback? Fallback writer for + // non-inlined terms: final PostingsWriterBase wrappedPostingsWriter; - /** If docFreq <= maxPulsingDocFreq, its postings are + /** If the total number of positions (summed across all docs + * for this term) is <= maxPositions, then the postings are * inlined into terms dict */ - public PulsingPostingsWriterImpl(int maxPulsingDocFreq, PostingsWriterBase wrappedPostingsWriter) throws IOException { + public PulsingPostingsWriterImpl(int maxPositions, PostingsWriterBase wrappedPostingsWriter) throws IOException { super(); - pendingDocs = new Document[maxPulsingDocFreq]; - for(int i=0;i= the cutoff: this.wrappedPostingsWriter = wrappedPostingsWriter; } @@ -131,14 +87,14 @@ public final class PulsingPostingsWriterImpl extends PostingsWriterBase { public void start(IndexOutput termsOut) throws IOException { this.termsOut = termsOut; CodecUtil.writeHeader(termsOut, CODEC, VERSION_CURRENT); - termsOut.writeVInt(pendingDocs.length); + termsOut.writeVInt(pending.length); // encode maxPositions in header wrappedPostingsWriter.start(termsOut); } @Override public void startTerm() { - assert pendingDocCount == 0; - pulsed = false; + //System.out.println("PW startTerm"); + assert pendingCount == 0; } // TODO: -- should we NOT reuse across fields? would @@ -148,73 +104,56 @@ public final class PulsingPostingsWriterImpl extends PostingsWriterBase { // our parent calls setField whenever the field changes @Override public void setField(FieldInfo fieldInfo) { - this.fieldInfo = fieldInfo; omitTF = fieldInfo.omitTermFreqAndPositions; + //System.out.println("PW field=" + fieldInfo.name + " omitTF=" + omitTF); storePayloads = fieldInfo.storePayloads; wrappedPostingsWriter.setField(fieldInfo); } @Override public void startDoc(int docID, int termDocFreq) throws IOException { - assert docID >= 0: "got docID=" + docID; - - if (!pulsed && pendingDocCount == pendingDocs.length) { - - // OK we just crossed the threshold, this term should - // now be written with our wrapped codec: - wrappedPostingsWriter.startTerm(); - - // Flush all buffered docs - for(int i=0;i currentDoc.positions.length) { - currentDoc.reallocPositions(termDocFreq); - } - currentDoc.numPositions = 0; } } @Override public void addPosition(int position, BytesRef payload) throws IOException { - if (pulsed) { + + //System.out.println("PW pos=" + position + " payload=" + (payload == null ? "null" : payload.length + " bytes")); + if (pendingCount == pending.length) { + push(); + } + + if (pendingCount == -1) { + // We've already seen too many docs for this term -- + // just forward to our fallback writer wrappedPostingsWriter.addPosition(position, payload); } else { - // just buffer up - Position pos = currentDoc.positions[currentDoc.numPositions++]; + // buffer up + final Position pos = pending[pendingCount++]; pos.pos = position; + pos.docID = currentDoc.docID; if (payload != null && payload.length > 0) { if (pos.payload == null) { pos.payload = new BytesRef(payload); @@ -229,86 +168,146 @@ public final class PulsingPostingsWriterImpl extends PostingsWriterBase { @Override public void finishDoc() throws IOException { - assert omitTF || currentDoc.numPositions == currentDoc.termDocFreq; - if (pulsed) { + //System.out.println("PW finishDoc"); + if (pendingCount == -1) { wrappedPostingsWriter.finishDoc(); } } - boolean pendingIsIndexTerm; - - int pulsedCount; - int nonPulsedCount; + private final RAMOutputStream buffer = new RAMOutputStream(); + private final RAMOutputStream buffer2 = new RAMOutputStream(); /** Called when we are done adding docs to this term */ @Override - public void finishTerm(int docCount, boolean isIndexTerm) throws IOException { + public void finishTerm(TermStats stats) throws IOException { + //System.out.println("PW finishTerm docCount=" + stats.docFreq); - assert docCount > 0; + assert pendingCount > 0 || pendingCount == -1; - pendingIsIndexTerm |= isIndexTerm; - - if (pulsed) { - wrappedPostingsWriter.finishTerm(docCount, pendingIsIndexTerm); - pendingIsIndexTerm = false; - pulsedCount++; + if (pendingCount == -1) { + wrappedPostingsWriter.finishTerm(stats); } else { - nonPulsedCount++; - // OK, there were few enough occurrences for this + + // There were few enough total occurrences for this // term, so we fully inline our postings data into // terms dict, now: - int lastDocID = 0; - for(int i=0;i + + + + + + +Pulsing Codec: inlines low frequency terms' postings into terms dictionary. + + diff --git a/lucene/src/java/org/apache/lucene/index/codecs/sep/IntIndexInput.java b/lucene/src/java/org/apache/lucene/index/codecs/sep/IntIndexInput.java index 2ab0f46a391..631476df0ba 100644 --- a/lucene/src/java/org/apache/lucene/index/codecs/sep/IntIndexInput.java +++ b/lucene/src/java/org/apache/lucene/index/codecs/sep/IntIndexInput.java @@ -17,11 +17,11 @@ package org.apache.lucene.index.codecs.sep; * limitations under the License. */ -import org.apache.lucene.store.IndexInput; -import org.apache.lucene.util.IntsRef; - -import java.io.IOException; import java.io.Closeable; +import java.io.IOException; + +import org.apache.lucene.store.DataInput; +import org.apache.lucene.util.IntsRef; /** Defines basic API for writing ints to an IndexOutput. * IntBlockCodec interacts with this API. @see @@ -39,7 +39,7 @@ public abstract class IntIndexInput implements Closeable { // TODO: -- can we simplify this? public abstract static class Index { - public abstract void read(IndexInput indexIn, boolean absolute) throws IOException; + public abstract void read(DataInput indexIn, boolean absolute) throws IOException; public abstract void read(IntIndexInput.Reader indexIn, boolean absolute) throws IOException; @@ -48,6 +48,7 @@ public abstract class IntIndexInput implements Closeable { public abstract void set(Index other); + @Override public abstract Object clone(); } diff --git a/lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsReaderImpl.java b/lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsReaderImpl.java index 342d2fa8bcd..b693db361c9 100644 --- a/lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsReaderImpl.java +++ b/lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsReaderImpl.java @@ -20,15 +20,18 @@ package org.apache.lucene.index.codecs.sep; import java.io.IOException; import java.util.Collection; -import org.apache.lucene.index.DocsEnum; import org.apache.lucene.index.DocsAndPositionsEnum; +import org.apache.lucene.index.DocsEnum; import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.IndexFileNames; import org.apache.lucene.index.SegmentInfo; +import org.apache.lucene.index.TermState; +import org.apache.lucene.index.codecs.BlockTermState; import org.apache.lucene.index.codecs.PostingsReaderBase; -import org.apache.lucene.index.codecs.TermState; +import org.apache.lucene.store.ByteArrayDataInput; import org.apache.lucene.store.Directory; import org.apache.lucene.store.IndexInput; +import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.CodecUtil; @@ -129,44 +132,120 @@ public class SepPostingsReaderImpl extends PostingsReaderBase { } } - private static class SepTermState extends TermState { + private static final class SepTermState extends BlockTermState { // We store only the seek point to the docs file because // the rest of the info (freqIndex, posIndex, etc.) is // stored in the docs file: IntIndexInput.Index docIndex; + IntIndexInput.Index posIndex; + IntIndexInput.Index freqIndex; + long payloadFP; + long skipFP; + // Only used for "primary" term state; these are never + // copied on clone: + byte[] bytes; + ByteArrayDataInput bytesReader; + + @Override public Object clone() { SepTermState other = (SepTermState) super.clone(); other.docIndex = (IntIndexInput.Index) docIndex.clone(); + if (freqIndex != null) { + other.freqIndex = (IntIndexInput.Index) freqIndex.clone(); + } + if (posIndex != null) { + other.posIndex = (IntIndexInput.Index) posIndex.clone(); + } return other; } - public void copy(TermState _other) { - super.copy(_other); + @Override + public void copyFrom(TermState _other) { + super.copyFrom(_other); SepTermState other = (SepTermState) _other; docIndex.set(other.docIndex); + if (freqIndex != null && other.freqIndex != null) { + freqIndex.set(other.freqIndex); + } + if (posIndex != null && other.posIndex != null) { + posIndex.set(other.posIndex); + } + payloadFP = other.payloadFP; + skipFP = other.skipFP; } @Override public String toString() { - return "tis.fp=" + filePointer + " docFreq=" + docFreq + " ord=" + ord + " docIndex=" + docIndex; + return super.toString() + " docIndex=" + docIndex + " freqIndex=" + freqIndex + " posIndex=" + posIndex + " payloadFP=" + payloadFP + " skipFP=" + skipFP; } } @Override - public TermState newTermState() throws IOException { - final SepTermState state = new SepTermState(); + public BlockTermState newTermState() throws IOException { + final SepTermState state = new SepTermState(); state.docIndex = docIn.index(); + if (freqIn != null) { + state.freqIndex = freqIn.index(); + } + if (posIn != null) { + state.posIndex = posIn.index(); + } return state; } @Override - public void readTerm(IndexInput termsIn, FieldInfo fieldInfo, TermState termState, boolean isIndexTerm) throws IOException { - ((SepTermState) termState).docIndex.read(termsIn, isIndexTerm); + public void readTermsBlock(IndexInput termsIn, FieldInfo fieldInfo, BlockTermState _termState) throws IOException { + final SepTermState termState = (SepTermState) _termState; + final int len = termsIn.readVInt(); + //System.out.println("SepR.readTermsBlock len=" + len); + if (termState.bytes == null) { + termState.bytes = new byte[ArrayUtil.oversize(len, 1)]; + termState.bytesReader = new ByteArrayDataInput(termState.bytes); + } else if (termState.bytes.length < len) { + termState.bytes = new byte[ArrayUtil.oversize(len, 1)]; + } + termState.bytesReader.reset(termState.bytes, 0, len); + termsIn.readBytes(termState.bytes, 0, len); } @Override - public DocsEnum docs(FieldInfo fieldInfo, TermState _termState, Bits skipDocs, DocsEnum reuse) throws IOException { + public void nextTerm(FieldInfo fieldInfo, BlockTermState _termState) throws IOException { + final SepTermState termState = (SepTermState) _termState; + //System.out.println("SepR.nextTerm termCount=" + termState.termCount); + //System.out.println(" docFreq=" + termState.docFreq); + final boolean isFirstTerm = termState.termCount == 0; + termState.docIndex.read(termState.bytesReader, isFirstTerm); + //System.out.println(" docIndex=" + termState.docIndex); + if (!fieldInfo.omitTermFreqAndPositions) { + termState.freqIndex.read(termState.bytesReader, isFirstTerm); + //System.out.println(" freqIndex=" + termState.freqIndex); + termState.posIndex.read(termState.bytesReader, isFirstTerm); + //System.out.println(" posIndex=" + termState.posIndex); + if (fieldInfo.storePayloads) { + if (isFirstTerm) { + termState.payloadFP = termState.bytesReader.readVLong(); + } else { + termState.payloadFP += termState.bytesReader.readVLong(); + } + //System.out.println(" payloadFP=" + termState.payloadFP); + } + } + if (termState.docFreq >= skipInterval) { + //System.out.println(" readSkip @ " + termState.bytesReader.pos); + if (isFirstTerm) { + termState.skipFP = termState.bytesReader.readVLong(); + } else { + termState.skipFP += termState.bytesReader.readVLong(); + } + //System.out.println(" skipFP=" + termState.skipFP); + } else if (isFirstTerm) { + termState.skipFP = termState.bytesReader.readVLong(); + } + } + + @Override + public DocsEnum docs(FieldInfo fieldInfo, BlockTermState _termState, Bits skipDocs, DocsEnum reuse) throws IOException { final SepTermState termState = (SepTermState) _termState; SepDocsEnum docsEnum; if (reuse == null || !(reuse instanceof SepDocsEnum)) { @@ -185,7 +264,7 @@ public class SepPostingsReaderImpl extends PostingsReaderBase { } @Override - public DocsAndPositionsEnum docsAndPositions(FieldInfo fieldInfo, TermState _termState, Bits skipDocs, DocsAndPositionsEnum reuse) throws IOException { + public DocsAndPositionsEnum docsAndPositions(FieldInfo fieldInfo, BlockTermState _termState, Bits skipDocs, DocsAndPositionsEnum reuse) throws IOException { assert !fieldInfo.omitTermFreqAndPositions; final SepTermState termState = (SepTermState) _termState; SepDocsAndPositionsEnum postingsEnum; @@ -217,7 +296,7 @@ public class SepPostingsReaderImpl extends PostingsReaderBase { private Bits skipDocs; private final IntIndexInput.Reader docReader; private final IntIndexInput.Reader freqReader; - private long skipOffset; + private long skipFP; private final IntIndexInput.Index docIndex; private final IntIndexInput.Index freqIndex; @@ -258,18 +337,15 @@ public class SepPostingsReaderImpl extends PostingsReaderBase { docIndex.seek(docReader); if (!omitTF) { - freqIndex.read(docReader, true); + freqIndex.set(termState.freqIndex); freqIndex.seek(freqReader); - - posIndex.read(docReader, true); - // skip payload offset - docReader.readVLong(); } else { freq = 1; } - skipOffset = docReader.readVLong(); docFreq = termState.docFreq; + // NOTE: unused if docFreq < skipInterval: + skipFP = termState.skipFP; count = 0; doc = 0; skipped = false; @@ -288,9 +364,11 @@ public class SepPostingsReaderImpl extends PostingsReaderBase { count++; // Decode next doc + //System.out.println("decode docDelta:"); doc += docReader.next(); if (!omitTF) { + //System.out.println("decode freq:"); freq = freqReader.next(); } @@ -298,13 +376,13 @@ public class SepPostingsReaderImpl extends PostingsReaderBase { break; } } - return doc; } @Override public int read() throws IOException { // TODO: -- switch to bulk read api in IntIndexInput + //System.out.println("sepdocs read"); final int[] docs = bulkResult.docs.ints; final int[] freqs = bulkResult.freqs.ints; int i = 0; @@ -312,14 +390,17 @@ public class SepPostingsReaderImpl extends PostingsReaderBase { while (i < length && count < docFreq) { count++; // manually inlined call to next() for speed + //System.out.println("decode doc"); doc += docReader.next(); if (!omitTF) { + //System.out.println("decode freq"); freq = freqReader.next(); } if (skipDocs == null || !skipDocs.get(doc)) { docs[i] = doc; freqs[i] = freq; + //System.out.println(" docs[" + i + "]=" + doc + " count=" + count + " dF=" + docFreq); i++; } } @@ -359,7 +440,7 @@ public class SepPostingsReaderImpl extends PostingsReaderBase { if (!skipped) { // We haven't yet skipped for this posting - skipper.init(skipOffset, + skipper.init(skipFP, docIndex, freqIndex, posIndex, @@ -409,14 +490,14 @@ public class SepPostingsReaderImpl extends PostingsReaderBase { private final IntIndexInput.Reader freqReader; private final IntIndexInput.Reader posReader; private final IndexInput payloadIn; - private long skipOffset; + private long skipFP; private final IntIndexInput.Index docIndex; private final IntIndexInput.Index freqIndex; private final IntIndexInput.Index posIndex; private final IntIndexInput startDocIn; - private long payloadOffset; + private long payloadFP; private int pendingPosCount; private int position; @@ -442,21 +523,26 @@ public class SepPostingsReaderImpl extends PostingsReaderBase { SepDocsAndPositionsEnum init(FieldInfo fieldInfo, SepTermState termState, Bits skipDocs) throws IOException { this.skipDocs = skipDocs; storePayloads = fieldInfo.storePayloads; + //System.out.println("Sep D&P init"); // TODO: can't we only do this if consumer // skipped consuming the previous docs? docIndex.set(termState.docIndex); docIndex.seek(docReader); + //System.out.println(" docIndex=" + docIndex); - freqIndex.read(docReader, true); + freqIndex.set(termState.freqIndex); freqIndex.seek(freqReader); + //System.out.println(" freqIndex=" + freqIndex); - posIndex.read(docReader, true); + posIndex.set(termState.posIndex); + //System.out.println(" posIndex=" + posIndex); posSeekPending = true; payloadPending = false; - payloadOffset = docReader.readVLong(); - skipOffset = docReader.readVLong(); + payloadFP = termState.payloadFP; + skipFP = termState.skipFP; + //System.out.println(" skipFP=" + skipFP); docFreq = termState.docFreq; count = 0; @@ -482,8 +568,10 @@ public class SepPostingsReaderImpl extends PostingsReaderBase { // freq=1 case? // Decode next doc + //System.out.println(" sep d&p read doc"); doc += docReader.next(); - + + //System.out.println(" sep d&p read freq"); freq = freqReader.next(); pendingPosCount += freq; @@ -509,6 +597,7 @@ public class SepPostingsReaderImpl extends PostingsReaderBase { @Override public int advance(int target) throws IOException { + //System.out.println("SepD&P advance target=" + target + " vs current=" + doc + " this=" + this); // TODO: jump right to next() if target is < X away // from where we are now? @@ -519,6 +608,7 @@ public class SepPostingsReaderImpl extends PostingsReaderBase { // skip data if (skipper == null) { + //System.out.println(" create skipper"); // This DocsEnum has never done any skipping skipper = new SepSkipListReader((IndexInput) skipIn.clone(), freqIn, @@ -528,46 +618,54 @@ public class SepPostingsReaderImpl extends PostingsReaderBase { } if (!skipped) { + //System.out.println(" init skip data skipFP=" + skipFP); // We haven't yet skipped for this posting - skipper.init(skipOffset, + skipper.init(skipFP, docIndex, freqIndex, posIndex, - payloadOffset, + payloadFP, docFreq, storePayloads); skipped = true; } - final int newCount = skipper.skipTo(target); + //System.out.println(" skip newCount=" + newCount + " vs " + count); if (newCount > count) { // Skipper did move skipper.getFreqIndex().seek(freqReader); skipper.getDocIndex().seek(docReader); - //skipper.getPosIndex().seek(posReader); + // NOTE: don't seek pos here; do it lazily + // instead. Eg a PhraseQuery may skip to many + // docs before finally asking for positions... posIndex.set(skipper.getPosIndex()); posSeekPending = true; count = newCount; doc = skipper.getDoc(); + //System.out.println(" moved to doc=" + doc); //payloadIn.seek(skipper.getPayloadPointer()); - payloadOffset = skipper.getPayloadPointer(); + payloadFP = skipper.getPayloadPointer(); pendingPosCount = 0; pendingPayloadBytes = 0; payloadPending = false; payloadLength = skipper.getPayloadLength(); + //System.out.println(" move payloadLen=" + payloadLength); } } // Now, linear scan for the rest: do { if (nextDoc() == NO_MORE_DOCS) { + //System.out.println(" advance nextDoc=END"); return NO_MORE_DOCS; } + //System.out.println(" advance nextDoc=" + doc); } while (target > doc); + //System.out.println(" return doc=" + doc); return doc; } @@ -575,7 +673,7 @@ public class SepPostingsReaderImpl extends PostingsReaderBase { public int nextPosition() throws IOException { if (posSeekPending) { posIndex.seek(posReader); - payloadIn.seek(payloadOffset); + payloadIn.seek(payloadFP); posSeekPending = false; } diff --git a/lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsWriterImpl.java b/lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsWriterImpl.java index 6be97d22f9d..9e9b9966808 100644 --- a/lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsWriterImpl.java +++ b/lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsWriterImpl.java @@ -25,7 +25,9 @@ import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.IndexFileNames; import org.apache.lucene.index.SegmentWriteState; import org.apache.lucene.index.codecs.PostingsWriterBase; +import org.apache.lucene.index.codecs.TermStats; import org.apache.lucene.store.IndexOutput; +import org.apache.lucene.store.RAMOutputStream; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.CodecUtil; @@ -68,8 +70,7 @@ public final class SepPostingsWriterImpl extends PostingsWriterBase { boolean storePayloads; boolean omitTF; - // Starts a new term - long lastSkipStart; + long lastSkipFP; FieldInfo fieldInfo; @@ -79,7 +80,10 @@ public final class SepPostingsWriterImpl extends PostingsWriterBase { long lastPayloadStart; int lastDocID; int df; - private boolean firstDoc; + private int pendingTermCount; + + // Holds pending byte[] blob for the current terms block + private final RAMOutputStream indexBytesWriter = new RAMOutputStream(); public SepPostingsWriterImpl(SegmentWriteState state, IntStreamFactory factory) throws IOException { super(); @@ -143,13 +147,9 @@ public final class SepPostingsWriterImpl extends PostingsWriterBase { payloadStart = payloadOut.getFilePointer(); lastPayloadLength = -1; } - firstDoc = true; skipListWriter.resetSkip(docIndex, freqIndex, posIndex); } - // TODO: -- should we NOT reuse across fields? would - // be cleaner - // Currently, this instance is re-used across fields, so // our parent calls setField whenever the field changes @Override @@ -160,27 +160,13 @@ public final class SepPostingsWriterImpl extends PostingsWriterBase { storePayloads = !omitTF && fieldInfo.storePayloads; } - /** Adds a new doc in this term. If this returns null * then we just skip consuming positions/payloads. */ @Override public void startDoc(int docID, int termDocFreq) throws IOException { - if (firstDoc) { - // TODO: we are writing absolute file pointers below, - // which is wasteful. It'd be better compression to - // write the "baseline" into each indexed term, then - // write only the delta here. - if (!omitTF) { - freqIndex.write(docOut, true); - posIndex.write(docOut, true); - docOut.writeVLong(payloadStart); - } - docOut.writeVLong(skipOut.getFilePointer()); - firstDoc = false; - } - final int delta = docID - lastDocID; + //System.out.println("SepW startDoc: write doc=" + docID + " delta=" + delta); if (docID < 0 || (df > 0 && delta <= 0)) { throw new CorruptIndexException("docs out of order (" + docID + " <= " + lastDocID + " )"); @@ -189,6 +175,7 @@ public final class SepPostingsWriterImpl extends PostingsWriterBase { if ((++df % skipInterval) == 0) { // TODO: -- awkward we have to make these two // separate calls to skipper + //System.out.println(" buffer skip lastDocID=" + lastDocID); skipListWriter.setSkipData(lastDocID, storePayloads, lastPayloadLength); skipListWriter.bufferSkip(df); } @@ -196,10 +183,20 @@ public final class SepPostingsWriterImpl extends PostingsWriterBase { lastDocID = docID; docOut.write(delta); if (!omitTF) { + //System.out.println(" sepw startDoc: write freq=" + termDocFreq); freqOut.write(termDocFreq); } } + @Override + public void flushTermsBlock() throws IOException { + //System.out.println("SepW.flushTermsBlock: pendingTermCount=" + pendingTermCount + " bytesUsed=" + indexBytesWriter.getFilePointer()); + termsOut.writeVLong((int) indexBytesWriter.getFilePointer()); + indexBytesWriter.writeTo(termsOut); + indexBytesWriter.reset(); + pendingTermCount = 0; + } + /** Add a new position & payload */ @Override public void addPosition(int position, BytesRef payload) throws IOException { @@ -239,20 +236,57 @@ public final class SepPostingsWriterImpl extends PostingsWriterBase { /** Called when we are done adding docs to this term */ @Override - public void finishTerm(int docCount, boolean isIndexTerm) throws IOException { - + public void finishTerm(TermStats stats) throws IOException { // TODO: -- wasteful we are counting this in two places? - assert docCount > 0; - assert docCount == df; + assert stats.docFreq > 0; + assert stats.docFreq == df; - docIndex.write(termsOut, isIndexTerm); + final boolean isFirstTerm = pendingTermCount == 0; + //System.out.println("SepW.finishTerm: isFirstTerm=" + isFirstTerm); + + docIndex.write(indexBytesWriter, isFirstTerm); + //System.out.println(" docIndex=" + docIndex); + + if (!omitTF) { + freqIndex.write(indexBytesWriter, isFirstTerm); + //System.out.println(" freqIndex=" + freqIndex); + + posIndex.write(indexBytesWriter, isFirstTerm); + //System.out.println(" posIndex=" + posIndex); + if (storePayloads) { + if (isFirstTerm) { + indexBytesWriter.writeVLong(payloadStart); + } else { + indexBytesWriter.writeVLong(payloadStart - lastPayloadStart); + } + lastPayloadStart = payloadStart; + //System.out.println(" payloadFP=" + payloadStart); + } + } if (df >= skipInterval) { + //System.out.println(" skipFP=" + skipStart); + final long skipFP = skipOut.getFilePointer(); skipListWriter.writeSkip(skipOut); + //System.out.println(" writeSkip @ " + indexBytesWriter.getFilePointer()); + if (isFirstTerm) { + indexBytesWriter.writeVLong(skipFP); + } else { + indexBytesWriter.writeVLong(skipFP - lastSkipFP); + } + lastSkipFP = skipFP; + } else if (isFirstTerm) { + // TODO: this is somewhat wasteful; eg if no terms in + // this block will use skip data, we don't need to + // write this: + final long skipFP = skipOut.getFilePointer(); + indexBytesWriter.writeVLong(skipFP); + lastSkipFP = skipFP; } lastDocID = 0; df = 0; + pendingTermCount++; } @Override diff --git a/lucene/src/java/org/apache/lucene/index/codecs/sep/package.html b/lucene/src/java/org/apache/lucene/index/codecs/sep/package.html new file mode 100644 index 00000000000..b51d9102715 --- /dev/null +++ b/lucene/src/java/org/apache/lucene/index/codecs/sep/package.html @@ -0,0 +1,25 @@ + + + + + + + +Sep: base support for separate files (doc,frq,pos,skp,pyl) + + diff --git a/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextFieldsReader.java b/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextFieldsReader.java index 890b1de029e..8e3427704b4 100644 --- a/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextFieldsReader.java +++ b/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextFieldsReader.java @@ -125,28 +125,32 @@ class SimpleTextFieldsReader extends FieldsProducer { private final IndexInput in; private final boolean omitTF; private int docFreq; + private long totalTermFreq; private long docsStart; private boolean ended; - private final BytesRefFSTEnum> fstEnum; + private final BytesRefFSTEnum>> fstEnum; - public SimpleTextTermsEnum(FST> fst, boolean omitTF) throws IOException { + public SimpleTextTermsEnum(FST>> fst, boolean omitTF) throws IOException { this.in = (IndexInput) SimpleTextFieldsReader.this.in.clone(); this.omitTF = omitTF; - fstEnum = new BytesRefFSTEnum>(fst); + fstEnum = new BytesRefFSTEnum>>(fst); } + @Override public SeekStatus seek(BytesRef text, boolean useCache /* ignored */) throws IOException { //System.out.println("seek to text=" + text.utf8ToString()); - final BytesRefFSTEnum.InputOutput> result = fstEnum.seekCeil(text); + final BytesRefFSTEnum.InputOutput>> result = fstEnum.seekCeil(text); if (result == null) { //System.out.println(" end"); return SeekStatus.END; } else { //System.out.println(" got text=" + term.utf8ToString()); - PairOutputs.Pair pair = result.output; - docsStart = pair.output1; - docFreq = pair.output2.intValue(); + PairOutputs.Pair> pair1 = result.output; + PairOutputs.Pair pair2 = pair1.output2; + docsStart = pair1.output1; + docFreq = pair2.output1.intValue(); + totalTermFreq = pair2.output2; if (result.input.equals(text)) { //System.out.println(" match docsStart=" + docsStart); @@ -158,18 +162,16 @@ class SimpleTextFieldsReader extends FieldsProducer { } } - @Override - public void cacheCurrentTerm() { - } - @Override public BytesRef next() throws IOException { assert !ended; - final BytesRefFSTEnum.InputOutput> result = fstEnum.next(); + final BytesRefFSTEnum.InputOutput>> result = fstEnum.next(); if (result != null) { - final PairOutputs.Pair pair = result.output; - docsStart = pair.output1; - docFreq = pair.output2.intValue(); + PairOutputs.Pair> pair1 = result.output; + PairOutputs.Pair pair2 = pair1.output2; + docsStart = pair1.output1; + docFreq = pair2.output1.intValue(); + totalTermFreq = pair2.output2; return result.input; } else { return null; @@ -196,6 +198,11 @@ class SimpleTextFieldsReader extends FieldsProducer { return docFreq; } + @Override + public long totalTermFreq() { + return totalTermFreq; + } + @Override public DocsEnum docs(Bits skipDocs, DocsEnum reuse) throws IOException { SimpleTextDocsEnum docsEnum; @@ -221,7 +228,7 @@ class SimpleTextFieldsReader extends FieldsProducer { } return docsAndPositionsEnum.reset(docsStart, skipDocs); } - + @Override public Comparator getComparator() { return BytesRef.getUTF8SortedAsUnicodeComparator(); @@ -446,15 +453,14 @@ class SimpleTextFieldsReader extends FieldsProducer { } private class SimpleTextTerms extends Terms { - private final String field; private final long termsStart; private final boolean omitTF; - private FST> fst; - + private long sumTotalTermFreq; + private FST>> fst; + private int termCount; private final BytesRef scratch = new BytesRef(10); public SimpleTextTerms(String field, long termsStart) throws IOException { - this.field = StringHelper.intern(field); this.termsStart = termsStart; omitTF = fieldInfos.fieldInfo(field).omitTermFreqAndPositions; loadTerms(); @@ -462,24 +468,38 @@ class SimpleTextFieldsReader extends FieldsProducer { private void loadTerms() throws IOException { PositiveIntOutputs posIntOutputs = PositiveIntOutputs.getSingleton(false); - Builder> b = new Builder>(FST.INPUT_TYPE.BYTE1, 0, 0, true, new PairOutputs(posIntOutputs, posIntOutputs)); + final Builder>> b; + b = new Builder>>(FST.INPUT_TYPE.BYTE1, + 0, + 0, + true, + new PairOutputs>(posIntOutputs, + new PairOutputs(posIntOutputs, posIntOutputs))); IndexInput in = (IndexInput) SimpleTextFieldsReader.this.in.clone(); in.seek(termsStart); final BytesRef lastTerm = new BytesRef(10); long lastDocsStart = -1; int docFreq = 0; + long totalTermFreq = 0; while(true) { readLine(in, scratch); if (scratch.equals(END) || scratch.startsWith(FIELD)) { if (lastDocsStart != -1) { - b.add(lastTerm, new PairOutputs.Pair(lastDocsStart, Long.valueOf(docFreq))); + b.add(lastTerm, new PairOutputs.Pair>(lastDocsStart, + new PairOutputs.Pair((long) docFreq, + posIntOutputs.get(totalTermFreq)))); + sumTotalTermFreq += totalTermFreq; } break; } else if (scratch.startsWith(DOC)) { docFreq++; + } else if (scratch.startsWith(POS)) { + totalTermFreq++; } else if (scratch.startsWith(TERM)) { if (lastDocsStart != -1) { - b.add(lastTerm, new PairOutputs.Pair(lastDocsStart, Long.valueOf(docFreq))); + b.add(lastTerm, new PairOutputs.Pair>(lastDocsStart, + new PairOutputs.Pair((long) docFreq, + posIntOutputs.get(totalTermFreq)))); } lastDocsStart = in.getFilePointer(); final int len = scratch.length - TERM.length; @@ -489,6 +509,9 @@ class SimpleTextFieldsReader extends FieldsProducer { System.arraycopy(scratch.bytes, TERM.length, lastTerm.bytes, 0, len); lastTerm.length = len; docFreq = 0; + sumTotalTermFreq += totalTermFreq; + totalTermFreq = 0; + termCount++; } } fst = b.finish(); @@ -514,6 +537,16 @@ class SimpleTextFieldsReader extends FieldsProducer { public Comparator getComparator() { return BytesRef.getUTF8SortedAsUnicodeComparator(); } + + @Override + public long getUniqueTermCount() { + return (long) termCount; + } + + @Override + public long getSumTotalTermFreq() { + return sumTotalTermFreq; + } } @Override diff --git a/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextFieldsWriter.java b/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextFieldsWriter.java index ae6338943e0..128da45c9b7 100644 --- a/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextFieldsWriter.java +++ b/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextFieldsWriter.java @@ -22,6 +22,7 @@ import org.apache.lucene.util.UnicodeUtil; import org.apache.lucene.index.codecs.FieldsConsumer; import org.apache.lucene.index.codecs.TermsConsumer; import org.apache.lucene.index.codecs.PostingsConsumer; +import org.apache.lucene.index.codecs.TermStats; import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.SegmentWriteState; import org.apache.lucene.store.IndexOutput; @@ -84,11 +85,11 @@ class SimpleTextFieldsWriter extends FieldsConsumer { } @Override - public void finishTerm(BytesRef term, int numDocs) throws IOException { + public void finishTerm(BytesRef term, TermStats stats) throws IOException { } @Override - public void finish() throws IOException { + public void finish(long sumTotalTermFreq) throws IOException { } @Override diff --git a/lucene/src/java/org/apache/lucene/index/codecs/simpletext/package.html b/lucene/src/java/org/apache/lucene/index/codecs/simpletext/package.html new file mode 100644 index 00000000000..88aad683412 --- /dev/null +++ b/lucene/src/java/org/apache/lucene/index/codecs/simpletext/package.html @@ -0,0 +1,25 @@ + + + + + + + +Simpletext Codec: writes human readable postings. + + diff --git a/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardCodec.java b/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardCodec.java index 384fe2a3bc5..f0af9ca2507 100644 --- a/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardCodec.java +++ b/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardCodec.java @@ -33,8 +33,8 @@ import org.apache.lucene.index.codecs.TermsIndexWriterBase; import org.apache.lucene.index.codecs.TermsIndexReaderBase; import org.apache.lucene.index.codecs.VariableGapTermsIndexWriter; import org.apache.lucene.index.codecs.VariableGapTermsIndexReader; -import org.apache.lucene.index.codecs.PrefixCodedTermsWriter; -import org.apache.lucene.index.codecs.PrefixCodedTermsReader; +import org.apache.lucene.index.codecs.BlockTermsWriter; +import org.apache.lucene.index.codecs.BlockTermsReader; import org.apache.lucene.store.Directory; /** Default codec. @@ -66,7 +66,7 @@ public class StandardCodec extends Codec { success = false; try { - FieldsConsumer ret = new PrefixCodedTermsWriter(indexWriter, state, docs, BytesRef.getUTF8SortedAsUnicodeComparator()); + FieldsConsumer ret = new BlockTermsWriter(indexWriter, state, docs, BytesRef.getUTF8SortedAsUnicodeComparator()); success = true; return ret; } finally { @@ -103,15 +103,15 @@ public class StandardCodec extends Codec { success = false; try { - FieldsProducer ret = new PrefixCodedTermsReader(indexReader, - state.dir, - state.fieldInfos, - state.segmentInfo.name, - postings, - state.readBufferSize, - BytesRef.getUTF8SortedAsUnicodeComparator(), - TERMS_CACHE_SIZE, - state.codecId); + FieldsProducer ret = new BlockTermsReader(indexReader, + state.dir, + state.fieldInfos, + state.segmentInfo.name, + postings, + state.readBufferSize, + BytesRef.getUTF8SortedAsUnicodeComparator(), + TERMS_CACHE_SIZE, + state.codecId); success = true; return ret; } finally { @@ -134,7 +134,7 @@ public class StandardCodec extends Codec { @Override public void files(Directory dir, SegmentInfo segmentInfo, String id, Set files) throws IOException { StandardPostingsReader.files(dir, segmentInfo, id, files); - PrefixCodedTermsReader.files(dir, segmentInfo, id, files); + BlockTermsReader.files(dir, segmentInfo, id, files); VariableGapTermsIndexReader.files(dir, segmentInfo, id, files); } @@ -146,7 +146,7 @@ public class StandardCodec extends Codec { public static void getStandardExtensions(Set extensions) { extensions.add(FREQ_EXTENSION); extensions.add(PROX_EXTENSION); - PrefixCodedTermsReader.getExtensions(extensions); + BlockTermsReader.getExtensions(extensions); VariableGapTermsIndexReader.getIndexExtensions(extensions); } } diff --git a/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardPostingsReader.java b/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardPostingsReader.java index eed6b0e6735..0c9dd4f5c86 100644 --- a/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardPostingsReader.java +++ b/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardPostingsReader.java @@ -20,15 +20,18 @@ package org.apache.lucene.index.codecs.standard; import java.io.IOException; import java.util.Collection; -import org.apache.lucene.store.Directory; -import org.apache.lucene.index.SegmentInfo; -import org.apache.lucene.index.FieldInfo; -import org.apache.lucene.index.DocsEnum; import org.apache.lucene.index.DocsAndPositionsEnum; +import org.apache.lucene.index.DocsEnum; +import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.IndexFileNames; +import org.apache.lucene.index.SegmentInfo; +import org.apache.lucene.index.TermState; +import org.apache.lucene.index.codecs.BlockTermState; import org.apache.lucene.index.codecs.PostingsReaderBase; -import org.apache.lucene.index.codecs.TermState; +import org.apache.lucene.store.ByteArrayDataInput; +import org.apache.lucene.store.Directory; import org.apache.lucene.store.IndexInput; +import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.CodecUtil; @@ -45,9 +48,12 @@ public class StandardPostingsReader extends PostingsReaderBase { int skipInterval; int maxSkipLevels; + //private String segment; + public StandardPostingsReader(Directory dir, SegmentInfo segmentInfo, int readBufferSize, String codecId) throws IOException { freqIn = dir.openInput(IndexFileNames.segmentFileName(segmentInfo.name, codecId, StandardCodec.FREQ_EXTENSION), readBufferSize); + //this.segment = segmentInfo.name; if (segmentInfo.getHasProx()) { boolean success = false; try { @@ -83,33 +89,46 @@ public class StandardPostingsReader extends PostingsReaderBase { } // Must keep final because we do non-standard clone - private final static class DocTermState extends TermState { + private final static class StandardTermState extends BlockTermState { long freqOffset; long proxOffset; int skipOffset; + // Only used by the "primary" TermState -- clones don't + // copy this (basically they are "transient"): + ByteArrayDataInput bytesReader; + byte[] bytes; + + @Override public Object clone() { - DocTermState other = new DocTermState(); - other.copy(this); + StandardTermState other = new StandardTermState(); + other.copyFrom(this); return other; } - public void copy(TermState _other) { - super.copy(_other); - DocTermState other = (DocTermState) _other; + @Override + public void copyFrom(TermState _other) { + super.copyFrom(_other); + StandardTermState other = (StandardTermState) _other; freqOffset = other.freqOffset; proxOffset = other.proxOffset; skipOffset = other.skipOffset; + + // Do not copy bytes, bytesReader (else TermState is + // very heavy, ie drags around the entire block's + // byte[]). On seek back, if next() is in fact used + // (rare!), they will be re-read from disk. } + @Override public String toString() { return super.toString() + " freqFP=" + freqOffset + " proxFP=" + proxOffset + " skipOffset=" + skipOffset; } } @Override - public TermState newTermState() { - return new DocTermState(); + public BlockTermState newTermState() { + return new StandardTermState(); } @Override @@ -125,35 +144,61 @@ public class StandardPostingsReader extends PostingsReaderBase { } } + /* Reads but does not decode the byte[] blob holding + metadata for the current terms block */ @Override - public void readTerm(IndexInput termsIn, FieldInfo fieldInfo, TermState termState, boolean isIndexTerm) - throws IOException { + public void readTermsBlock(IndexInput termsIn, FieldInfo fieldInfo, BlockTermState _termState) throws IOException { + final StandardTermState termState = (StandardTermState) _termState; - final DocTermState docTermState = (DocTermState) termState; - - if (isIndexTerm) { - docTermState.freqOffset = termsIn.readVLong(); - } else { - docTermState.freqOffset += termsIn.readVLong(); + final int len = termsIn.readVInt(); + //System.out.println("SPR.readTermsBlock termsIn.fp=" + termsIn.getFilePointer()); + if (termState.bytes == null) { + termState.bytes = new byte[ArrayUtil.oversize(len, 1)]; + termState.bytesReader = new ByteArrayDataInput(null); + } else if (termState.bytes.length < len) { + termState.bytes = new byte[ArrayUtil.oversize(len, 1)]; } - if (docTermState.docFreq >= skipInterval) { - docTermState.skipOffset = termsIn.readVInt(); + termsIn.readBytes(termState.bytes, 0, len); + termState.bytesReader.reset(termState.bytes, 0, len); + } + + @Override + public void nextTerm(FieldInfo fieldInfo, BlockTermState _termState) + throws IOException { + final StandardTermState termState = (StandardTermState) _termState; + //System.out.println("StandardR.nextTerm seg=" + segment); + final boolean isFirstTerm = termState.termCount == 0; + + if (isFirstTerm) { + termState.freqOffset = termState.bytesReader.readVLong(); } else { - docTermState.skipOffset = 0; + termState.freqOffset += termState.bytesReader.readVLong(); + } + //System.out.println(" dF=" + termState.docFreq); + //System.out.println(" freqFP=" + termState.freqOffset); + assert termState.freqOffset < freqIn.length(); + + if (termState.docFreq >= skipInterval) { + termState.skipOffset = termState.bytesReader.readVInt(); + //System.out.println(" skipOffset=" + termState.skipOffset + " vs freqIn.length=" + freqIn.length()); + assert termState.freqOffset + termState.skipOffset < freqIn.length(); + } else { + // undefined } if (!fieldInfo.omitTermFreqAndPositions) { - if (isIndexTerm) { - docTermState.proxOffset = termsIn.readVLong(); + if (isFirstTerm) { + termState.proxOffset = termState.bytesReader.readVLong(); } else { - docTermState.proxOffset += termsIn.readVLong(); + termState.proxOffset += termState.bytesReader.readVLong(); } + //System.out.println(" proxFP=" + termState.proxOffset); } } @Override - public DocsEnum docs(FieldInfo fieldInfo, TermState termState, Bits skipDocs, DocsEnum reuse) throws IOException { + public DocsEnum docs(FieldInfo fieldInfo, BlockTermState termState, Bits skipDocs, DocsEnum reuse) throws IOException { SegmentDocsEnum docsEnum; if (reuse == null || !(reuse instanceof SegmentDocsEnum)) { docsEnum = new SegmentDocsEnum(freqIn); @@ -166,11 +211,11 @@ public class StandardPostingsReader extends PostingsReaderBase { docsEnum = new SegmentDocsEnum(freqIn); } } - return docsEnum.reset(fieldInfo, (DocTermState) termState, skipDocs); + return docsEnum.reset(fieldInfo, (StandardTermState) termState, skipDocs); } @Override - public DocsAndPositionsEnum docsAndPositions(FieldInfo fieldInfo, TermState termState, Bits skipDocs, DocsAndPositionsEnum reuse) throws IOException { + public DocsAndPositionsEnum docsAndPositions(FieldInfo fieldInfo, BlockTermState termState, Bits skipDocs, DocsAndPositionsEnum reuse) throws IOException { if (fieldInfo.omitTermFreqAndPositions) { return null; } @@ -189,7 +234,7 @@ public class StandardPostingsReader extends PostingsReaderBase { docsEnum = new SegmentDocsAndPositionsAndPayloadsEnum(freqIn, proxIn); } } - return docsEnum.reset(fieldInfo, (DocTermState) termState, skipDocs); + return docsEnum.reset(fieldInfo, (StandardTermState) termState, skipDocs); } else { SegmentDocsAndPositionsEnum docsEnum; if (reuse == null || !(reuse instanceof SegmentDocsAndPositionsEnum)) { @@ -203,7 +248,7 @@ public class StandardPostingsReader extends PostingsReaderBase { docsEnum = new SegmentDocsAndPositionsEnum(freqIn, proxIn); } } - return docsEnum.reset(fieldInfo, (DocTermState) termState, skipDocs); + return docsEnum.reset(fieldInfo, (StandardTermState) termState, skipDocs); } } @@ -233,7 +278,7 @@ public class StandardPostingsReader extends PostingsReaderBase { this.freqIn = (IndexInput) freqIn.clone(); } - public SegmentDocsEnum reset(FieldInfo fieldInfo, DocTermState termState, Bits skipDocs) throws IOException { + public SegmentDocsEnum reset(FieldInfo fieldInfo, StandardTermState termState, Bits skipDocs) throws IOException { omitTF = fieldInfo.omitTermFreqAndPositions; if (omitTF) { freq = 1; @@ -248,8 +293,10 @@ public class StandardPostingsReader extends PostingsReaderBase { // cases freqIn.seek(termState.freqOffset); limit = termState.docFreq; + assert limit > 0; ord = 0; doc = 0; + //System.out.println(" sde limit=" + limit + " freqFP=" + freqOffset); skipped = false; @@ -331,13 +378,10 @@ public class StandardPostingsReader extends PostingsReaderBase { @Override public int advance(int target) throws IOException { - // TODO: jump right to next() if target is < X away - // from where we are now? - - if (skipOffset > 0) { + if ((target - skipInterval) >= doc && limit >= skipInterval) { // There are enough docs in the posting to have - // skip data + // skip data, and it isn't too close. if (skipper == null) { // This is the first time this enum has ever been used for skipping -- do lazy init @@ -407,7 +451,7 @@ public class StandardPostingsReader extends PostingsReaderBase { this.proxIn = (IndexInput) proxIn.clone(); } - public SegmentDocsAndPositionsEnum reset(FieldInfo fieldInfo, DocTermState termState, Bits skipDocs) throws IOException { + public SegmentDocsAndPositionsEnum reset(FieldInfo fieldInfo, StandardTermState termState, Bits skipDocs) throws IOException { assert !fieldInfo.omitTermFreqAndPositions; assert !fieldInfo.storePayloads; @@ -420,6 +464,8 @@ public class StandardPostingsReader extends PostingsReaderBase { lazyProxPointer = termState.proxOffset; limit = termState.docFreq; + assert limit > 0; + ord = 0; doc = 0; position = 0; @@ -430,6 +476,7 @@ public class StandardPostingsReader extends PostingsReaderBase { freqOffset = termState.freqOffset; proxOffset = termState.proxOffset; skipOffset = termState.skipOffset; + //System.out.println("StandardR.D&PE reset seg=" + segment + " limit=" + limit + " freqFP=" + freqOffset + " proxFP=" + proxOffset); return this; } @@ -438,6 +485,7 @@ public class StandardPostingsReader extends PostingsReaderBase { public int nextDoc() throws IOException { while(true) { if (ord == limit) { + //System.out.println("StandardR.D&PE seg=" + segment + " nextDoc return doc=END"); return doc = NO_MORE_DOCS; } @@ -461,6 +509,7 @@ public class StandardPostingsReader extends PostingsReaderBase { position = 0; + //System.out.println("StandardR.D&PE nextDoc seg=" + segment + " return doc=" + doc); return doc; } @@ -477,13 +526,12 @@ public class StandardPostingsReader extends PostingsReaderBase { @Override public int advance(int target) throws IOException { - // TODO: jump right to next() if target is < X away - // from where we are now? + //System.out.println("StandardR.D&PE advance target=" + target); - if (skipOffset > 0) { + if ((target - skipInterval) >= doc && limit >= skipInterval) { // There are enough docs in the posting to have - // skip data + // skip data, and it isn't too close if (skipper == null) { // This is the first time this enum has ever been used for skipping -- do lazy init @@ -524,6 +572,7 @@ public class StandardPostingsReader extends PostingsReaderBase { return doc; } + @Override public int nextPosition() throws IOException { if (lazyProxPointer != -1) { @@ -552,10 +601,12 @@ public class StandardPostingsReader extends PostingsReaderBase { /** Returns the payload at this position, or null if no * payload was indexed. */ + @Override public BytesRef getPayload() throws IOException { throw new IOException("No payloads exist for this field!"); } + @Override public boolean hasPayload() { return false; } @@ -594,7 +645,7 @@ public class StandardPostingsReader extends PostingsReaderBase { this.proxIn = (IndexInput) proxIn.clone(); } - public SegmentDocsAndPositionsAndPayloadsEnum reset(FieldInfo fieldInfo, DocTermState termState, Bits skipDocs) throws IOException { + public SegmentDocsAndPositionsAndPayloadsEnum reset(FieldInfo fieldInfo, StandardTermState termState, Bits skipDocs) throws IOException { assert !fieldInfo.omitTermFreqAndPositions; assert fieldInfo.storePayloads; if (payload == null) { @@ -622,6 +673,7 @@ public class StandardPostingsReader extends PostingsReaderBase { freqOffset = termState.freqOffset; proxOffset = termState.proxOffset; skipOffset = termState.skipOffset; + //System.out.println("StandardR.D&PE reset seg=" + segment + " limit=" + limit + " freqFP=" + freqOffset + " proxFP=" + proxOffset + " this=" + this); return this; } @@ -630,6 +682,7 @@ public class StandardPostingsReader extends PostingsReaderBase { public int nextDoc() throws IOException { while(true) { if (ord == limit) { + //System.out.println("StandardR.D&PE seg=" + segment + " nextDoc return doc=END"); return doc = NO_MORE_DOCS; } @@ -653,6 +706,7 @@ public class StandardPostingsReader extends PostingsReaderBase { position = 0; + //System.out.println("StandardR.D&PE nextDoc seg=" + segment + " return doc=" + doc); return doc; } @@ -669,13 +723,12 @@ public class StandardPostingsReader extends PostingsReaderBase { @Override public int advance(int target) throws IOException { - // TODO: jump right to next() if target is < X away - // from where we are now? + //System.out.println("StandardR.D&PE advance seg=" + segment + " target=" + target + " this=" + this); - if (skipOffset > 0) { + if ((target - skipInterval) >= doc && limit >= skipInterval) { // There are enough docs in the posting to have - // skip data + // skip data, and it isn't too close if (skipper == null) { // This is the first time this enum has ever been used for skipping -- do lazy init @@ -687,7 +740,7 @@ public class StandardPostingsReader extends PostingsReaderBase { // This is the first time this posting has // skipped, since reset() was called, so now we // load the skip data for this posting - + //System.out.println(" init skipper freqOffset=" + freqOffset + " skipOffset=" + skipOffset + " vs len=" + freqIn.length()); skipper.init(freqOffset+skipOffset, freqOffset, proxOffset, limit, true); @@ -718,6 +771,7 @@ public class StandardPostingsReader extends PostingsReaderBase { return doc; } + @Override public int nextPosition() throws IOException { if (lazyProxPointer != -1) { @@ -748,6 +802,7 @@ public class StandardPostingsReader extends PostingsReaderBase { posPendingCount--; position = 0; payloadPending = false; + //System.out.println("StandardR.D&PE skipPos"); } // read next position @@ -771,11 +826,13 @@ public class StandardPostingsReader extends PostingsReaderBase { assert posPendingCount >= 0: "nextPosition() was called too many times (more than freq() times) posPendingCount=" + posPendingCount; + //System.out.println("StandardR.D&PE nextPos return pos=" + position); return position; } /** Returns the payload at this position, or null if no * payload was indexed. */ + @Override public BytesRef getPayload() throws IOException { assert lazyProxPointer == -1; assert posPendingCount < freq; @@ -785,6 +842,7 @@ public class StandardPostingsReader extends PostingsReaderBase { if (payloadLength > payload.bytes.length) { payload.grow(payloadLength); } + proxIn.readBytes(payload.bytes, 0, payloadLength); payload.length = payloadLength; payloadPending = false; @@ -792,6 +850,7 @@ public class StandardPostingsReader extends PostingsReaderBase { return payload; } + @Override public boolean hasPayload() { return payloadPending && payloadLength > 0; } diff --git a/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardPostingsWriter.java b/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardPostingsWriter.java index 6dafdcda728..22e923f2273 100644 --- a/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardPostingsWriter.java +++ b/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardPostingsWriter.java @@ -22,12 +22,14 @@ package org.apache.lucene.index.codecs.standard; import java.io.IOException; -import org.apache.lucene.store.IndexOutput; -import org.apache.lucene.index.FieldInfo; -import org.apache.lucene.index.SegmentWriteState; -import org.apache.lucene.index.IndexFileNames; import org.apache.lucene.index.CorruptIndexException; +import org.apache.lucene.index.FieldInfo; +import org.apache.lucene.index.IndexFileNames; +import org.apache.lucene.index.SegmentWriteState; import org.apache.lucene.index.codecs.PostingsWriterBase; +import org.apache.lucene.index.codecs.TermStats; +import org.apache.lucene.store.IndexOutput; +import org.apache.lucene.store.RAMOutputStream; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.CodecUtil; @@ -58,8 +60,15 @@ public final class StandardPostingsWriter extends PostingsWriterBase { int lastPayloadLength; int lastPosition; + private int pendingCount; + + //private String segment; + + private RAMOutputStream bytesWriter = new RAMOutputStream(); + public StandardPostingsWriter(SegmentWriteState state) throws IOException { super(); + //this.segment = state.segmentName; String fileName = IndexFileNames.segmentFileName(state.segmentName, state.codecId, StandardCodec.FREQ_EXTENSION); freqOut = state.directory.createOutput(fileName); @@ -95,6 +104,7 @@ public final class StandardPostingsWriter extends PostingsWriterBase { @Override public void startTerm() { + //System.out.println("StandardW: startTerm seg=" + segment + " pendingCount=" + pendingCount); freqStart = freqOut.getFilePointer(); if (proxOut != null) { proxStart = proxOut.getFilePointer(); @@ -108,9 +118,12 @@ public final class StandardPostingsWriter extends PostingsWriterBase { // our parent calls setField whenever the field changes @Override public void setField(FieldInfo fieldInfo) { + //System.out.println("SPW: setField"); this.fieldInfo = fieldInfo; omitTermFreqAndPositions = fieldInfo.omitTermFreqAndPositions; storePayloads = fieldInfo.storePayloads; + //System.out.println(" set init blockFreqStart=" + freqStart); + //System.out.println(" set init blockProxStart=" + proxStart); } int lastDocID; @@ -120,6 +133,7 @@ public final class StandardPostingsWriter extends PostingsWriterBase { * then we just skip consuming positions/payloads. */ @Override public void startDoc(int docID, int termDocFreq) throws IOException { + //System.out.println("StandardW: startDoc seg=" + segment + " docID=" + docID + " tf=" + termDocFreq); final int delta = docID - lastDocID; @@ -150,6 +164,7 @@ public final class StandardPostingsWriter extends PostingsWriterBase { /** Add a new position & payload */ @Override public void addPosition(int position, BytesRef payload) throws IOException { + //System.out.println("StandardW: addPos pos=" + position + " payload=" + (payload == null ? "null" : (payload.length + " bytes")) + " proxFP=" + proxOut.getFilePointer()); assert !omitTermFreqAndPositions: "omitTermFreqAndPositions is true"; assert proxOut != null; @@ -184,40 +199,51 @@ public final class StandardPostingsWriter extends PostingsWriterBase { /** Called when we are done adding docs to this term */ @Override - public void finishTerm(int docCount, boolean isIndexTerm) throws IOException { - assert docCount > 0; + public void finishTerm(TermStats stats) throws IOException { + //System.out.println("StandardW.finishTerm seg=" + segment); + assert stats.docFreq > 0; // TODO: wasteful we are counting this (counting # docs // for this term) in two places? - assert docCount == df; + assert stats.docFreq == df; - if (isIndexTerm) { - // Write absolute at seek points - termsOut.writeVLong(freqStart); + final boolean isFirstTerm = pendingCount == 0; + //System.out.println(" isFirstTerm=" + isFirstTerm); + + //System.out.println(" freqFP=" + freqStart); + if (isFirstTerm) { + bytesWriter.writeVLong(freqStart); } else { - // Write delta between seek points - termsOut.writeVLong(freqStart - lastFreqStart); + bytesWriter.writeVLong(freqStart-lastFreqStart); } - lastFreqStart = freqStart; if (df >= skipInterval) { - termsOut.writeVInt((int) (skipListWriter.writeSkip(freqOut)-freqStart)); + bytesWriter.writeVInt((int) (skipListWriter.writeSkip(freqOut)-freqStart)); } - + if (!omitTermFreqAndPositions) { - if (isIndexTerm) { - // Write absolute at seek points - termsOut.writeVLong(proxStart); + //System.out.println(" proxFP=" + proxStart); + if (isFirstTerm) { + bytesWriter.writeVLong(proxStart); } else { - // Write delta between seek points - termsOut.writeVLong(proxStart - lastProxStart); + bytesWriter.writeVLong(proxStart - lastProxStart); } lastProxStart = proxStart; } - + lastDocID = 0; df = 0; + pendingCount++; + } + + @Override + public void flushTermsBlock() throws IOException { + //System.out.println("SPW.flushBlock pendingCount=" + pendingCount); + termsOut.writeVInt((int) bytesWriter.getFilePointer()); + bytesWriter.writeTo(termsOut); + bytesWriter.reset(); + pendingCount = 0; } @Override diff --git a/lucene/src/java/org/apache/lucene/index/codecs/standard/package.html b/lucene/src/java/org/apache/lucene/index/codecs/standard/package.html new file mode 100644 index 00000000000..aca1dc4b665 --- /dev/null +++ b/lucene/src/java/org/apache/lucene/index/codecs/standard/package.html @@ -0,0 +1,25 @@ + + + + + + + +Standard Codec + + diff --git a/lucene/src/java/org/apache/lucene/queryParser/QueryParserBase.java b/lucene/src/java/org/apache/lucene/queryParser/QueryParserBase.java index 078e2adfb63..41ad00987ed 100644 --- a/lucene/src/java/org/apache/lucene/queryParser/QueryParserBase.java +++ b/lucene/src/java/org/apache/lucene/queryParser/QueryParserBase.java @@ -1,1150 +1,1150 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.lucene.queryParser; - -import java.io.IOException; -import java.io.StringReader; -import java.text.Collator; -import java.text.DateFormat; -import java.util.*; - -import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.analysis.CachingTokenFilter; -import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; -import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute; -import org.apache.lucene.document.DateTools; -import org.apache.lucene.index.Term; -import org.apache.lucene.queryParser.QueryParser.Operator; -import org.apache.lucene.search.*; -import org.apache.lucene.util.BytesRef; -import org.apache.lucene.util.Version; - -/** This class is overridden by QueryParser in QueryParser.jj - * and acts to separate the majority of the Java code from the .jj grammar file. - */ -public abstract class QueryParserBase { - - /** Do not catch this exception in your code, it means you are using methods that you should no longer use. */ - public static class MethodRemovedUseAnother extends Throwable {} - - static final int CONJ_NONE = 0; - static final int CONJ_AND = 1; - static final int CONJ_OR = 2; - - static final int MOD_NONE = 0; - static final int MOD_NOT = 10; - static final int MOD_REQ = 11; - - // make it possible to call setDefaultOperator() without accessing - // the nested class: - /** Alternative form of QueryParser.Operator.AND */ - public static final Operator AND_OPERATOR = Operator.AND; - /** Alternative form of QueryParser.Operator.OR */ - public static final Operator OR_OPERATOR = Operator.OR; - - /** The actual operator that parser uses to combine query terms */ - Operator operator = OR_OPERATOR; - - boolean lowercaseExpandedTerms = true; - MultiTermQuery.RewriteMethod multiTermRewriteMethod = MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT; - boolean allowLeadingWildcard = false; - boolean enablePositionIncrements = true; - - Analyzer analyzer; - String field; - int phraseSlop = 0; - float fuzzyMinSim = FuzzyQuery.defaultMinSimilarity; - int fuzzyPrefixLength = FuzzyQuery.defaultPrefixLength; - Locale locale = Locale.getDefault(); - - // the default date resolution - DateTools.Resolution dateResolution = null; - // maps field names to date resolutions - Map fieldToDateResolution = null; - - // The collator to use when determining range inclusion, - // for use when constructing RangeQuerys. - Collator rangeCollator = null; - - boolean autoGeneratePhraseQueries; - - // So the generated QueryParser(CharStream) won't error out - protected QueryParserBase() { - } - - /** Initializes a query parser. Called by the QueryParser constructor - * @param matchVersion Lucene version to match. See above. - * @param f the default field for query terms. - * @param a used to find terms in the query text. - */ - public void init(Version matchVersion, String f, Analyzer a) { - analyzer = a; - field = f; - if (matchVersion.onOrAfter(Version.LUCENE_31)) { - setAutoGeneratePhraseQueries(false); - } else { - setAutoGeneratePhraseQueries(true); - } - } - - // the generated parser will create these in QueryParser - public abstract void ReInit(CharStream stream); - public abstract Query TopLevelQuery(String field) throws ParseException; - - - /** Parses a query string, returning a {@link org.apache.lucene.search.Query}. - * @param query the query string to be parsed. - * @throws ParseException if the parsing fails - */ - public Query parse(String query) throws ParseException { - ReInit(new FastCharStream(new StringReader(query))); - try { - // TopLevelQuery is a Query followed by the end-of-input (EOF) - Query res = TopLevelQuery(field); - return res!=null ? res : newBooleanQuery(false); - } - catch (ParseException tme) { - // rethrow to include the original query: - ParseException e = new ParseException("Cannot parse '" +query+ "': " + tme.getMessage()); - e.initCause(tme); - throw e; - } - catch (TokenMgrError tme) { - ParseException e = new ParseException("Cannot parse '" +query+ "': " + tme.getMessage()); - e.initCause(tme); - throw e; - } - catch (BooleanQuery.TooManyClauses tmc) { - ParseException e = new ParseException("Cannot parse '" +query+ "': too many boolean clauses"); - e.initCause(tmc); - throw e; - } - } - - - /** - * @return Returns the analyzer. - */ - public Analyzer getAnalyzer() { - return analyzer; - } - - /** - * @return Returns the default field. - */ - public String getField() { - return field; - } - - /** - * @see #setAutoGeneratePhraseQueries(boolean) - */ - public final boolean getAutoGeneratePhraseQueries() { - return autoGeneratePhraseQueries; - } - - /** - * Set to true if phrase queries will be automatically generated - * when the analyzer returns more than one term from whitespace - * delimited text. - * NOTE: this behavior may not be suitable for all languages. - *

      - * Set to false if phrase queries should only be generated when - * surrounded by double quotes. - */ - public final void setAutoGeneratePhraseQueries(boolean value) { - this.autoGeneratePhraseQueries = value; - } - - /** - * Get the minimal similarity for fuzzy queries. - */ - public float getFuzzyMinSim() { - return fuzzyMinSim; - } - - /** - * Set the minimum similarity for fuzzy queries. - * Default is 2f. - */ - public void setFuzzyMinSim(float fuzzyMinSim) { - this.fuzzyMinSim = fuzzyMinSim; - } - - /** - * Get the prefix length for fuzzy queries. - * @return Returns the fuzzyPrefixLength. - */ - public int getFuzzyPrefixLength() { - return fuzzyPrefixLength; - } - - /** - * Set the prefix length for fuzzy queries. Default is 0. - * @param fuzzyPrefixLength The fuzzyPrefixLength to set. - */ - public void setFuzzyPrefixLength(int fuzzyPrefixLength) { - this.fuzzyPrefixLength = fuzzyPrefixLength; - } - - /** - * Sets the default slop for phrases. If zero, then exact phrase matches - * are required. Default value is zero. - */ - public void setPhraseSlop(int phraseSlop) { - this.phraseSlop = phraseSlop; - } - - /** - * Gets the default slop for phrases. - */ - public int getPhraseSlop() { - return phraseSlop; - } - - - /** - * Set to true to allow leading wildcard characters. - *

      - * When set, * or ? are allowed as - * the first character of a PrefixQuery and WildcardQuery. - * Note that this can produce very slow - * queries on big indexes. - *

      - * Default: false. - */ - public void setAllowLeadingWildcard(boolean allowLeadingWildcard) { - this.allowLeadingWildcard = allowLeadingWildcard; - } - - /** - * @see #setAllowLeadingWildcard(boolean) - */ - public boolean getAllowLeadingWildcard() { - return allowLeadingWildcard; - } - - /** - * Set to true to enable position increments in result query. - *

      - * When set, result phrase and multi-phrase queries will - * be aware of position increments. - * Useful when e.g. a StopFilter increases the position increment of - * the token that follows an omitted token. - *

      - * Default: true. - */ - public void setEnablePositionIncrements(boolean enable) { - this.enablePositionIncrements = enable; - } - - /** - * @see #setEnablePositionIncrements(boolean) - */ - public boolean getEnablePositionIncrements() { - return enablePositionIncrements; - } - - /** - * Sets the boolean operator of the QueryParser. - * In default mode (OR_OPERATOR) terms without any modifiers - * are considered optional: for example capital of Hungary is equal to - * capital OR of OR Hungary.
      - * In AND_OPERATOR mode terms are considered to be in conjunction: the - * above mentioned query is parsed as capital AND of AND Hungary - */ - public void setDefaultOperator(Operator op) { - this.operator = op; - } - - - /** - * Gets implicit operator setting, which will be either AND_OPERATOR - * or OR_OPERATOR. - */ - public Operator getDefaultOperator() { - return operator; - } - - - /** - * Whether terms of wildcard, prefix, fuzzy and range queries are to be automatically - * lower-cased or not. Default is true. - */ - public void setLowercaseExpandedTerms(boolean lowercaseExpandedTerms) { - this.lowercaseExpandedTerms = lowercaseExpandedTerms; - } - - - /** - * @see #setLowercaseExpandedTerms(boolean) - */ - public boolean getLowercaseExpandedTerms() { - return lowercaseExpandedTerms; - } - - /** - * By default QueryParser uses {@link org.apache.lucene.search.MultiTermQuery#CONSTANT_SCORE_AUTO_REWRITE_DEFAULT} - * when creating a PrefixQuery, WildcardQuery or RangeQuery. This implementation is generally preferable because it - * a) Runs faster b) Does not have the scarcity of terms unduly influence score - * c) avoids any "TooManyBooleanClauses" exception. - * However, if your application really needs to use the - * old-fashioned BooleanQuery expansion rewriting and the above - * points are not relevant then use this to change - * the rewrite method. - */ - public void setMultiTermRewriteMethod(MultiTermQuery.RewriteMethod method) { - multiTermRewriteMethod = method; - } - - - /** - * @see #setMultiTermRewriteMethod - */ - public MultiTermQuery.RewriteMethod getMultiTermRewriteMethod() { - return multiTermRewriteMethod; - } - - /** - * Set locale used by date range parsing. - */ - public void setLocale(Locale locale) { - this.locale = locale; - } - - /** - * Returns current locale, allowing access by subclasses. - */ - public Locale getLocale() { - return locale; - } - - /** - * Sets the default date resolution used by RangeQueries for fields for which no - * specific date resolutions has been set. Field specific resolutions can be set - * with {@link #setDateResolution(String, org.apache.lucene.document.DateTools.Resolution)}. - * - * @param dateResolution the default date resolution to set - */ - public void setDateResolution(DateTools.Resolution dateResolution) { - this.dateResolution = dateResolution; - } - - /** - * Sets the date resolution used by RangeQueries for a specific field. - * - * @param fieldName field for which the date resolution is to be set - * @param dateResolution date resolution to set - */ - public void setDateResolution(String fieldName, DateTools.Resolution dateResolution) { - if (fieldName == null) { - throw new IllegalArgumentException("Field cannot be null."); - } - - if (fieldToDateResolution == null) { - // lazily initialize HashMap - fieldToDateResolution = new HashMap(); - } - - fieldToDateResolution.put(fieldName, dateResolution); - } - - /** - * Returns the date resolution that is used by RangeQueries for the given field. - * Returns null, if no default or field specific date resolution has been set - * for the given field. - * - */ - public DateTools.Resolution getDateResolution(String fieldName) { - if (fieldName == null) { - throw new IllegalArgumentException("Field cannot be null."); - } - - if (fieldToDateResolution == null) { - // no field specific date resolutions set; return default date resolution instead - return this.dateResolution; - } - - DateTools.Resolution resolution = fieldToDateResolution.get(fieldName); - if (resolution == null) { - // no date resolutions set for the given field; return default date resolution instead - resolution = this.dateResolution; - } - - return resolution; - } - - /** - * Sets the collator used to determine index term inclusion in ranges - * for RangeQuerys. - *

      - * WARNING: Setting the rangeCollator to a non-null - * collator using this method will cause every single index Term in the - * Field referenced by lowerTerm and/or upperTerm to be examined. - * Depending on the number of index Terms in this Field, the operation could - * be very slow. - * - * @param rc the collator to use when constructing RangeQuerys - */ - public void setRangeCollator(Collator rc) { - rangeCollator = rc; - } - - /** - * @return the collator used to determine index term inclusion in ranges - * for RangeQuerys. - */ - public Collator getRangeCollator() { - return rangeCollator; - } - - protected void addClause(List clauses, int conj, int mods, Query q) { - boolean required, prohibited; - - // If this term is introduced by AND, make the preceding term required, - // unless it's already prohibited - if (clauses.size() > 0 && conj == CONJ_AND) { - BooleanClause c = clauses.get(clauses.size()-1); - if (!c.isProhibited()) - c.setOccur(BooleanClause.Occur.MUST); - } - - if (clauses.size() > 0 && operator == AND_OPERATOR && conj == CONJ_OR) { - // If this term is introduced by OR, make the preceding term optional, - // unless it's prohibited (that means we leave -a OR b but +a OR b-->a OR b) - // notice if the input is a OR b, first term is parsed as required; without - // this modification a OR b would parsed as +a OR b - BooleanClause c = clauses.get(clauses.size()-1); - if (!c.isProhibited()) - c.setOccur(BooleanClause.Occur.SHOULD); - } - - // We might have been passed a null query; the term might have been - // filtered away by the analyzer. - if (q == null) - return; - - if (operator == OR_OPERATOR) { - // We set REQUIRED if we're introduced by AND or +; PROHIBITED if - // introduced by NOT or -; make sure not to set both. - prohibited = (mods == MOD_NOT); - required = (mods == MOD_REQ); - if (conj == CONJ_AND && !prohibited) { - required = true; - } - } else { - // We set PROHIBITED if we're introduced by NOT or -; We set REQUIRED - // if not PROHIBITED and not introduced by OR - prohibited = (mods == MOD_NOT); - required = (!prohibited && conj != CONJ_OR); - } - if (required && !prohibited) - clauses.add(newBooleanClause(q, BooleanClause.Occur.MUST)); - else if (!required && !prohibited) - clauses.add(newBooleanClause(q, BooleanClause.Occur.SHOULD)); - else if (!required && prohibited) - clauses.add(newBooleanClause(q, BooleanClause.Occur.MUST_NOT)); - else - throw new RuntimeException("Clause cannot be both required and prohibited"); - } - - /** - * @exception org.apache.lucene.queryParser.ParseException throw in overridden method to disallow - */ - protected Query getFieldQuery(String field, String queryText, boolean quoted) throws ParseException { - // Use the analyzer to get all the tokens, and then build a TermQuery, - // PhraseQuery, or nothing based on the term count - - TokenStream source; - try { - source = analyzer.reusableTokenStream(field, new StringReader(queryText)); - source.reset(); - } catch (IOException e) { - source = analyzer.tokenStream(field, new StringReader(queryText)); - } - CachingTokenFilter buffer = new CachingTokenFilter(source); - TermToBytesRefAttribute termAtt = null; - PositionIncrementAttribute posIncrAtt = null; - int numTokens = 0; - - boolean success = false; - try { - buffer.reset(); - success = true; - } catch (IOException e) { - // success==false if we hit an exception - } - if (success) { - if (buffer.hasAttribute(TermToBytesRefAttribute.class)) { - termAtt = buffer.getAttribute(TermToBytesRefAttribute.class); - } - if (buffer.hasAttribute(PositionIncrementAttribute.class)) { - posIncrAtt = buffer.getAttribute(PositionIncrementAttribute.class); - } - } - - int positionCount = 0; - boolean severalTokensAtSamePosition = false; - - boolean hasMoreTokens = false; - if (termAtt != null) { - try { - hasMoreTokens = buffer.incrementToken(); - while (hasMoreTokens) { - numTokens++; - int positionIncrement = (posIncrAtt != null) ? posIncrAtt.getPositionIncrement() : 1; - if (positionIncrement != 0) { - positionCount += positionIncrement; - } else { - severalTokensAtSamePosition = true; - } - hasMoreTokens = buffer.incrementToken(); - } - } catch (IOException e) { - // ignore - } - } - try { - // rewind the buffer stream - buffer.reset(); - - // close original stream - all tokens buffered - source.close(); - } - catch (IOException e) { - // ignore - } - - if (numTokens == 0) - return null; - else if (numTokens == 1) { - BytesRef term = new BytesRef(); - try { - boolean hasNext = buffer.incrementToken(); - assert hasNext == true; - termAtt.toBytesRef(term); - } catch (IOException e) { - // safe to ignore, because we know the number of tokens - } - return newTermQuery(new Term(field, term)); - } else { - if (severalTokensAtSamePosition || (!quoted && !autoGeneratePhraseQueries)) { - if (positionCount == 1 || (!quoted && !autoGeneratePhraseQueries)) { - // no phrase query: - BooleanQuery q = newBooleanQuery(positionCount == 1); - - BooleanClause.Occur occur = positionCount > 1 && operator == AND_OPERATOR ? - BooleanClause.Occur.MUST : BooleanClause.Occur.SHOULD; - - for (int i = 0; i < numTokens; i++) { - BytesRef term = new BytesRef(); - try { - boolean hasNext = buffer.incrementToken(); - assert hasNext == true; - termAtt.toBytesRef(term); - } catch (IOException e) { - // safe to ignore, because we know the number of tokens - } - - Query currentQuery = newTermQuery( - new Term(field, term)); - q.add(currentQuery, occur); - } - return q; - } - else { - // phrase query: - MultiPhraseQuery mpq = newMultiPhraseQuery(); - mpq.setSlop(phraseSlop); - List multiTerms = new ArrayList(); - int position = -1; - for (int i = 0; i < numTokens; i++) { - BytesRef term = new BytesRef(); - int positionIncrement = 1; - try { - boolean hasNext = buffer.incrementToken(); - assert hasNext == true; - termAtt.toBytesRef(term); - if (posIncrAtt != null) { - positionIncrement = posIncrAtt.getPositionIncrement(); - } - } catch (IOException e) { - // safe to ignore, because we know the number of tokens - } - - if (positionIncrement > 0 && multiTerms.size() > 0) { - if (enablePositionIncrements) { - mpq.add(multiTerms.toArray(new Term[0]),position); - } else { - mpq.add(multiTerms.toArray(new Term[0])); - } - multiTerms.clear(); - } - position += positionIncrement; - multiTerms.add(new Term(field, term)); - } - if (enablePositionIncrements) { - mpq.add(multiTerms.toArray(new Term[0]),position); - } else { - mpq.add(multiTerms.toArray(new Term[0])); - } - return mpq; - } - } - else { - PhraseQuery pq = newPhraseQuery(); - pq.setSlop(phraseSlop); - int position = -1; - - - for (int i = 0; i < numTokens; i++) { - BytesRef term = new BytesRef(); - int positionIncrement = 1; - - try { - boolean hasNext = buffer.incrementToken(); - assert hasNext == true; - termAtt.toBytesRef(term); - if (posIncrAtt != null) { - positionIncrement = posIncrAtt.getPositionIncrement(); - } - } catch (IOException e) { - // safe to ignore, because we know the number of tokens - } - - if (enablePositionIncrements) { - position += positionIncrement; - pq.add(new Term(field, term),position); - } else { - pq.add(new Term(field, term)); - } - } - return pq; - } - } - } - - - - /** - * Base implementation delegates to {@link #getFieldQuery(String,String,boolean)}. - * This method may be overridden, for example, to return - * a SpanNearQuery instead of a PhraseQuery. - * - * @exception org.apache.lucene.queryParser.ParseException throw in overridden method to disallow - */ - protected Query getFieldQuery(String field, String queryText, int slop) - throws ParseException { - Query query = getFieldQuery(field, queryText, true); - - if (query instanceof PhraseQuery) { - ((PhraseQuery) query).setSlop(slop); - } - if (query instanceof MultiPhraseQuery) { - ((MultiPhraseQuery) query).setSlop(slop); - } - - return query; - } - - /** - * - * @exception org.apache.lucene.queryParser.ParseException - */ - protected Query getRangeQuery(String field, - String part1, - String part2, - boolean startInclusive, - boolean endInclusive) throws ParseException - { - if (lowercaseExpandedTerms) { - part1 = part1==null ? null : part1.toLowerCase(); - part2 = part2==null ? null : part2.toLowerCase(); - } - - - DateFormat df = DateFormat.getDateInstance(DateFormat.SHORT, locale); - df.setLenient(true); - DateTools.Resolution resolution = getDateResolution(field); - - try { - part1 = DateTools.dateToString(df.parse(part1), resolution); - } catch (Exception e) { } - - try { - Date d2 = df.parse(part2); - if (endInclusive) { - // The user can only specify the date, not the time, so make sure - // the time is set to the latest possible time of that date to really - // include all documents: - Calendar cal = Calendar.getInstance(locale); - cal.setTime(d2); - cal.set(Calendar.HOUR_OF_DAY, 23); - cal.set(Calendar.MINUTE, 59); - cal.set(Calendar.SECOND, 59); - cal.set(Calendar.MILLISECOND, 999); - d2 = cal.getTime(); - } - part2 = DateTools.dateToString(d2, resolution); - } catch (Exception e) { } - - return newRangeQuery(field, part1, part2, startInclusive, endInclusive); - } - - /** - * Builds a new BooleanQuery instance - * @param disableCoord disable coord - * @return new BooleanQuery instance - */ - protected BooleanQuery newBooleanQuery(boolean disableCoord) { - return new BooleanQuery(disableCoord); - } - - /** - * Builds a new BooleanClause instance - * @param q sub query - * @param occur how this clause should occur when matching documents - * @return new BooleanClause instance - */ - protected BooleanClause newBooleanClause(Query q, BooleanClause.Occur occur) { - return new BooleanClause(q, occur); - } - - /** - * Builds a new TermQuery instance - * @param term term - * @return new TermQuery instance - */ - protected Query newTermQuery(Term term){ - return new TermQuery(term); - } - - /** - * Builds a new PhraseQuery instance - * @return new PhraseQuery instance - */ - protected PhraseQuery newPhraseQuery(){ - return new PhraseQuery(); - } - - /** - * Builds a new MultiPhraseQuery instance - * @return new MultiPhraseQuery instance - */ - protected MultiPhraseQuery newMultiPhraseQuery(){ - return new MultiPhraseQuery(); - } - - /** - * Builds a new PrefixQuery instance - * @param prefix Prefix term - * @return new PrefixQuery instance - */ - protected Query newPrefixQuery(Term prefix){ - PrefixQuery query = new PrefixQuery(prefix); - query.setRewriteMethod(multiTermRewriteMethod); - return query; - } - - /** - * Builds a new RegexpQuery instance - * @param regexp Regexp term - * @return new RegexpQuery instance - */ - protected Query newRegexpQuery(Term regexp) { - RegexpQuery query = new RegexpQuery(regexp); - query.setRewriteMethod(multiTermRewriteMethod); - return query; - } - - /** - * Builds a new FuzzyQuery instance - * @param term Term - * @param minimumSimilarity minimum similarity - * @param prefixLength prefix length - * @return new FuzzyQuery Instance - */ - protected Query newFuzzyQuery(Term term, float minimumSimilarity, int prefixLength) { - // FuzzyQuery doesn't yet allow constant score rewrite - return new FuzzyQuery(term,minimumSimilarity,prefixLength); - } - - /** - * Builds a new TermRangeQuery instance - * @param field Field - * @param part1 min - * @param part2 max - * @param startInclusive true if the start of the range is inclusive - * @param endInclusive true if the end of the range is inclusive - * @return new TermRangeQuery instance - */ - protected Query newRangeQuery(String field, String part1, String part2, boolean startInclusive, boolean endInclusive) { - final TermRangeQuery query = new TermRangeQuery(field, part1, part2, startInclusive, endInclusive, rangeCollator); - query.setRewriteMethod(multiTermRewriteMethod); - return query; - } - - /** - * Builds a new MatchAllDocsQuery instance - * @return new MatchAllDocsQuery instance - */ - protected Query newMatchAllDocsQuery() { - return new MatchAllDocsQuery(); - } - - /** - * Builds a new WildcardQuery instance - * @param t wildcard term - * @return new WildcardQuery instance - */ - protected Query newWildcardQuery(Term t) { - WildcardQuery query = new WildcardQuery(t); - query.setRewriteMethod(multiTermRewriteMethod); - return query; - } - - /** - * Factory method for generating query, given a set of clauses. - * By default creates a boolean query composed of clauses passed in. - * - * Can be overridden by extending classes, to modify query being - * returned. - * - * @param clauses List that contains {@link org.apache.lucene.search.BooleanClause} instances - * to join. - * - * @return Resulting {@link org.apache.lucene.search.Query} object. - * @exception org.apache.lucene.queryParser.ParseException throw in overridden method to disallow - */ - protected Query getBooleanQuery(List clauses) throws ParseException { - return getBooleanQuery(clauses, false); - } - - /** - * Factory method for generating query, given a set of clauses. - * By default creates a boolean query composed of clauses passed in. - * - * Can be overridden by extending classes, to modify query being - * returned. - * - * @param clauses List that contains {@link org.apache.lucene.search.BooleanClause} instances - * to join. - * @param disableCoord true if coord scoring should be disabled. - * - * @return Resulting {@link org.apache.lucene.search.Query} object. - * @exception org.apache.lucene.queryParser.ParseException throw in overridden method to disallow - */ - protected Query getBooleanQuery(List clauses, boolean disableCoord) - throws ParseException - { - if (clauses.size()==0) { - return null; // all clause words were filtered away by the analyzer. - } - BooleanQuery query = newBooleanQuery(disableCoord); - for(final BooleanClause clause: clauses) { - query.add(clause); - } - return query; - } - - /** - * Factory method for generating a query. Called when parser - * parses an input term token that contains one or more wildcard - * characters (? and *), but is not a prefix term token (one - * that has just a single * character at the end) - *

      - * Depending on settings, prefix term may be lower-cased - * automatically. It will not go through the default Analyzer, - * however, since normal Analyzers are unlikely to work properly - * with wildcard templates. - *

      - * Can be overridden by extending classes, to provide custom handling for - * wildcard queries, which may be necessary due to missing analyzer calls. - * - * @param field Name of the field query will use. - * @param termStr Term token that contains one or more wild card - * characters (? or *), but is not simple prefix term - * - * @return Resulting {@link org.apache.lucene.search.Query} built for the term - * @exception org.apache.lucene.queryParser.ParseException throw in overridden method to disallow - */ - protected Query getWildcardQuery(String field, String termStr) throws ParseException - { - if ("*".equals(field)) { - if ("*".equals(termStr)) return newMatchAllDocsQuery(); - } - if (!allowLeadingWildcard && (termStr.startsWith("*") || termStr.startsWith("?"))) - throw new ParseException("'*' or '?' not allowed as first character in WildcardQuery"); - if (lowercaseExpandedTerms) { - termStr = termStr.toLowerCase(); - } - Term t = new Term(field, termStr); - return newWildcardQuery(t); - } - - /** - * Factory method for generating a query. Called when parser - * parses an input term token that contains a regular expression - * query. - *

      - * Depending on settings, pattern term may be lower-cased - * automatically. It will not go through the default Analyzer, - * however, since normal Analyzers are unlikely to work properly - * with regular expression templates. - *

      - * Can be overridden by extending classes, to provide custom handling for - * regular expression queries, which may be necessary due to missing analyzer - * calls. - * - * @param field Name of the field query will use. - * @param termStr Term token that contains a regular expression - * - * @return Resulting {@link org.apache.lucene.search.Query} built for the term - * @exception org.apache.lucene.queryParser.ParseException throw in overridden method to disallow - */ - protected Query getRegexpQuery(String field, String termStr) throws ParseException - { - if (lowercaseExpandedTerms) { - termStr = termStr.toLowerCase(); - } - Term t = new Term(field, termStr); - return newRegexpQuery(t); - } - - /** - * Factory method for generating a query (similar to - * {@link #getWildcardQuery}). Called when parser parses an input term - * token that uses prefix notation; that is, contains a single '*' wildcard - * character as its last character. Since this is a special case - * of generic wildcard term, and such a query can be optimized easily, - * this usually results in a different query object. - *

      - * Depending on settings, a prefix term may be lower-cased - * automatically. It will not go through the default Analyzer, - * however, since normal Analyzers are unlikely to work properly - * with wildcard templates. - *

      - * Can be overridden by extending classes, to provide custom handling for - * wild card queries, which may be necessary due to missing analyzer calls. - * - * @param field Name of the field query will use. - * @param termStr Term token to use for building term for the query - * (without trailing '*' character!) - * - * @return Resulting {@link org.apache.lucene.search.Query} built for the term - * @exception org.apache.lucene.queryParser.ParseException throw in overridden method to disallow - */ - protected Query getPrefixQuery(String field, String termStr) throws ParseException - { - if (!allowLeadingWildcard && termStr.startsWith("*")) - throw new ParseException("'*' not allowed as first character in PrefixQuery"); - if (lowercaseExpandedTerms) { - termStr = termStr.toLowerCase(); - } - Term t = new Term(field, termStr); - return newPrefixQuery(t); - } - - /** - * Factory method for generating a query (similar to - * {@link #getWildcardQuery}). Called when parser parses - * an input term token that has the fuzzy suffix (~) appended. - * - * @param field Name of the field query will use. - * @param termStr Term token to use for building term for the query - * - * @return Resulting {@link org.apache.lucene.search.Query} built for the term - * @exception org.apache.lucene.queryParser.ParseException throw in overridden method to disallow - */ - protected Query getFuzzyQuery(String field, String termStr, float minSimilarity) throws ParseException - { - if (lowercaseExpandedTerms) { - termStr = termStr.toLowerCase(); - } - Term t = new Term(field, termStr); - return newFuzzyQuery(t, minSimilarity, fuzzyPrefixLength); - } - - - // extracted from the .jj grammar - Query handleBareTokenQuery(String qfield, Token term, Token fuzzySlop, boolean prefix, boolean wildcard, boolean fuzzy, boolean regexp) throws ParseException { - Query q; - - String termImage=discardEscapeChar(term.image); - if (wildcard) { - q = getWildcardQuery(qfield, term.image); - } else if (prefix) { - q = getPrefixQuery(qfield, - discardEscapeChar(term.image.substring - (0, term.image.length()-1))); - } else if (regexp) { - q = getRegexpQuery(qfield, term.image.substring(1, term.image.length()-1)); - } else if (fuzzy) { - float fms = fuzzyMinSim; - try { - fms = Float.valueOf(fuzzySlop.image.substring(1)).floatValue(); - } catch (Exception ignored) { } - if(fms < 0.0f){ - throw new ParseException("Minimum similarity for a FuzzyQuery has to be between 0.0f and 1.0f !"); - } else if (fms >= 1.0f && fms != (int) fms) { - throw new ParseException("Fractional edit distances are not allowed!"); - } - q = getFuzzyQuery(qfield, termImage, fms); - } else { - q = getFieldQuery(qfield, termImage, false); - } - return q; - } - - // extracted from the .jj grammar - Query handleQuotedTerm(String qfield, Token term, Token fuzzySlop) throws ParseException { - int s = phraseSlop; // default - if (fuzzySlop != null) { - try { - s = Float.valueOf(fuzzySlop.image.substring(1)).intValue(); - } - catch (Exception ignored) { } - } - return getFieldQuery(qfield, discardEscapeChar(term.image.substring(1, term.image.length()-1)), s); - } - - // extracted from the .jj grammar - Query handleBoost(Query q, Token boost) throws ParseException { - if (boost != null) { - float f = (float) 1.0; - try { - f = Float.valueOf(boost.image).floatValue(); - } - catch (Exception ignored) { - /* Should this be handled somehow? (defaults to "no boost", if - * boost number is invalid) - */ - } - - // avoid boosting null queries, such as those caused by stop words - if (q != null) { - q.setBoost(f); - } - } - return q; - } - - - - /** - * Returns a String where the escape char has been - * removed, or kept only once if there was a double escape. - * - * Supports escaped unicode characters, e. g. translates - * \\u0041 to A. - * - */ - String discardEscapeChar(String input) throws ParseException { - // Create char array to hold unescaped char sequence - char[] output = new char[input.length()]; - - // The length of the output can be less than the input - // due to discarded escape chars. This variable holds - // the actual length of the output - int length = 0; - - // We remember whether the last processed character was - // an escape character - boolean lastCharWasEscapeChar = false; - - // The multiplier the current unicode digit must be multiplied with. - // E. g. the first digit must be multiplied with 16^3, the second with 16^2... - int codePointMultiplier = 0; - - // Used to calculate the codepoint of the escaped unicode character - int codePoint = 0; - - for (int i = 0; i < input.length(); i++) { - char curChar = input.charAt(i); - if (codePointMultiplier > 0) { - codePoint += hexToInt(curChar) * codePointMultiplier; - codePointMultiplier >>>= 4; - if (codePointMultiplier == 0) { - output[length++] = (char)codePoint; - codePoint = 0; - } - } else if (lastCharWasEscapeChar) { - if (curChar == 'u') { - // found an escaped unicode character - codePointMultiplier = 16 * 16 * 16; - } else { - // this character was escaped - output[length] = curChar; - length++; - } - lastCharWasEscapeChar = false; - } else { - if (curChar == '\\') { - lastCharWasEscapeChar = true; - } else { - output[length] = curChar; - length++; - } - } - } - - if (codePointMultiplier > 0) { - throw new ParseException("Truncated unicode escape sequence."); - } - - if (lastCharWasEscapeChar) { - throw new ParseException("Term can not end with escape character."); - } - - return new String(output, 0, length); - } - - /** Returns the numeric value of the hexadecimal character */ - static final int hexToInt(char c) throws ParseException { - if ('0' <= c && c <= '9') { - return c - '0'; - } else if ('a' <= c && c <= 'f'){ - return c - 'a' + 10; - } else if ('A' <= c && c <= 'F') { - return c - 'A' + 10; - } else { - throw new ParseException("None-hex character in unicode escape sequence: " + c); - } - } - - /** - * Returns a String where those characters that QueryParser - * expects to be escaped are escaped by a preceding \. - */ - public static String escape(String s) { - StringBuilder sb = new StringBuilder(); - for (int i = 0; i < s.length(); i++) { - char c = s.charAt(i); - // These characters are part of the query syntax and must be escaped - if (c == '\\' || c == '+' || c == '-' || c == '!' || c == '(' || c == ')' || c == ':' - || c == '^' || c == '[' || c == ']' || c == '\"' || c == '{' || c == '}' || c == '~' - || c == '*' || c == '?' || c == '|' || c == '&') { - sb.append('\\'); - } - sb.append(c); - } - return sb.toString(); - } - -} +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.lucene.queryParser; + +import java.io.IOException; +import java.io.StringReader; +import java.text.Collator; +import java.text.DateFormat; +import java.util.*; + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.CachingTokenFilter; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; +import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute; +import org.apache.lucene.document.DateTools; +import org.apache.lucene.index.Term; +import org.apache.lucene.queryParser.QueryParser.Operator; +import org.apache.lucene.search.*; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.Version; + +/** This class is overridden by QueryParser in QueryParser.jj + * and acts to separate the majority of the Java code from the .jj grammar file. + */ +public abstract class QueryParserBase { + + /** Do not catch this exception in your code, it means you are using methods that you should no longer use. */ + public static class MethodRemovedUseAnother extends Throwable {} + + static final int CONJ_NONE = 0; + static final int CONJ_AND = 1; + static final int CONJ_OR = 2; + + static final int MOD_NONE = 0; + static final int MOD_NOT = 10; + static final int MOD_REQ = 11; + + // make it possible to call setDefaultOperator() without accessing + // the nested class: + /** Alternative form of QueryParser.Operator.AND */ + public static final Operator AND_OPERATOR = Operator.AND; + /** Alternative form of QueryParser.Operator.OR */ + public static final Operator OR_OPERATOR = Operator.OR; + + /** The actual operator that parser uses to combine query terms */ + Operator operator = OR_OPERATOR; + + boolean lowercaseExpandedTerms = true; + MultiTermQuery.RewriteMethod multiTermRewriteMethod = MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT; + boolean allowLeadingWildcard = false; + boolean enablePositionIncrements = true; + + Analyzer analyzer; + String field; + int phraseSlop = 0; + float fuzzyMinSim = FuzzyQuery.defaultMinSimilarity; + int fuzzyPrefixLength = FuzzyQuery.defaultPrefixLength; + Locale locale = Locale.getDefault(); + + // the default date resolution + DateTools.Resolution dateResolution = null; + // maps field names to date resolutions + Map fieldToDateResolution = null; + + // The collator to use when determining range inclusion, + // for use when constructing RangeQuerys. + Collator rangeCollator = null; + + boolean autoGeneratePhraseQueries; + + // So the generated QueryParser(CharStream) won't error out + protected QueryParserBase() { + } + + /** Initializes a query parser. Called by the QueryParser constructor + * @param matchVersion Lucene version to match. See above. + * @param f the default field for query terms. + * @param a used to find terms in the query text. + */ + public void init(Version matchVersion, String f, Analyzer a) { + analyzer = a; + field = f; + if (matchVersion.onOrAfter(Version.LUCENE_31)) { + setAutoGeneratePhraseQueries(false); + } else { + setAutoGeneratePhraseQueries(true); + } + } + + // the generated parser will create these in QueryParser + public abstract void ReInit(CharStream stream); + public abstract Query TopLevelQuery(String field) throws ParseException; + + + /** Parses a query string, returning a {@link org.apache.lucene.search.Query}. + * @param query the query string to be parsed. + * @throws ParseException if the parsing fails + */ + public Query parse(String query) throws ParseException { + ReInit(new FastCharStream(new StringReader(query))); + try { + // TopLevelQuery is a Query followed by the end-of-input (EOF) + Query res = TopLevelQuery(field); + return res!=null ? res : newBooleanQuery(false); + } + catch (ParseException tme) { + // rethrow to include the original query: + ParseException e = new ParseException("Cannot parse '" +query+ "': " + tme.getMessage()); + e.initCause(tme); + throw e; + } + catch (TokenMgrError tme) { + ParseException e = new ParseException("Cannot parse '" +query+ "': " + tme.getMessage()); + e.initCause(tme); + throw e; + } + catch (BooleanQuery.TooManyClauses tmc) { + ParseException e = new ParseException("Cannot parse '" +query+ "': too many boolean clauses"); + e.initCause(tmc); + throw e; + } + } + + + /** + * @return Returns the analyzer. + */ + public Analyzer getAnalyzer() { + return analyzer; + } + + /** + * @return Returns the default field. + */ + public String getField() { + return field; + } + + /** + * @see #setAutoGeneratePhraseQueries(boolean) + */ + public final boolean getAutoGeneratePhraseQueries() { + return autoGeneratePhraseQueries; + } + + /** + * Set to true if phrase queries will be automatically generated + * when the analyzer returns more than one term from whitespace + * delimited text. + * NOTE: this behavior may not be suitable for all languages. + *

      + * Set to false if phrase queries should only be generated when + * surrounded by double quotes. + */ + public final void setAutoGeneratePhraseQueries(boolean value) { + this.autoGeneratePhraseQueries = value; + } + + /** + * Get the minimal similarity for fuzzy queries. + */ + public float getFuzzyMinSim() { + return fuzzyMinSim; + } + + /** + * Set the minimum similarity for fuzzy queries. + * Default is 2f. + */ + public void setFuzzyMinSim(float fuzzyMinSim) { + this.fuzzyMinSim = fuzzyMinSim; + } + + /** + * Get the prefix length for fuzzy queries. + * @return Returns the fuzzyPrefixLength. + */ + public int getFuzzyPrefixLength() { + return fuzzyPrefixLength; + } + + /** + * Set the prefix length for fuzzy queries. Default is 0. + * @param fuzzyPrefixLength The fuzzyPrefixLength to set. + */ + public void setFuzzyPrefixLength(int fuzzyPrefixLength) { + this.fuzzyPrefixLength = fuzzyPrefixLength; + } + + /** + * Sets the default slop for phrases. If zero, then exact phrase matches + * are required. Default value is zero. + */ + public void setPhraseSlop(int phraseSlop) { + this.phraseSlop = phraseSlop; + } + + /** + * Gets the default slop for phrases. + */ + public int getPhraseSlop() { + return phraseSlop; + } + + + /** + * Set to true to allow leading wildcard characters. + *

      + * When set, * or ? are allowed as + * the first character of a PrefixQuery and WildcardQuery. + * Note that this can produce very slow + * queries on big indexes. + *

      + * Default: false. + */ + public void setAllowLeadingWildcard(boolean allowLeadingWildcard) { + this.allowLeadingWildcard = allowLeadingWildcard; + } + + /** + * @see #setAllowLeadingWildcard(boolean) + */ + public boolean getAllowLeadingWildcard() { + return allowLeadingWildcard; + } + + /** + * Set to true to enable position increments in result query. + *

      + * When set, result phrase and multi-phrase queries will + * be aware of position increments. + * Useful when e.g. a StopFilter increases the position increment of + * the token that follows an omitted token. + *

      + * Default: true. + */ + public void setEnablePositionIncrements(boolean enable) { + this.enablePositionIncrements = enable; + } + + /** + * @see #setEnablePositionIncrements(boolean) + */ + public boolean getEnablePositionIncrements() { + return enablePositionIncrements; + } + + /** + * Sets the boolean operator of the QueryParser. + * In default mode (OR_OPERATOR) terms without any modifiers + * are considered optional: for example capital of Hungary is equal to + * capital OR of OR Hungary.
      + * In AND_OPERATOR mode terms are considered to be in conjunction: the + * above mentioned query is parsed as capital AND of AND Hungary + */ + public void setDefaultOperator(Operator op) { + this.operator = op; + } + + + /** + * Gets implicit operator setting, which will be either AND_OPERATOR + * or OR_OPERATOR. + */ + public Operator getDefaultOperator() { + return operator; + } + + + /** + * Whether terms of wildcard, prefix, fuzzy and range queries are to be automatically + * lower-cased or not. Default is true. + */ + public void setLowercaseExpandedTerms(boolean lowercaseExpandedTerms) { + this.lowercaseExpandedTerms = lowercaseExpandedTerms; + } + + + /** + * @see #setLowercaseExpandedTerms(boolean) + */ + public boolean getLowercaseExpandedTerms() { + return lowercaseExpandedTerms; + } + + /** + * By default QueryParser uses {@link org.apache.lucene.search.MultiTermQuery#CONSTANT_SCORE_AUTO_REWRITE_DEFAULT} + * when creating a PrefixQuery, WildcardQuery or RangeQuery. This implementation is generally preferable because it + * a) Runs faster b) Does not have the scarcity of terms unduly influence score + * c) avoids any "TooManyBooleanClauses" exception. + * However, if your application really needs to use the + * old-fashioned BooleanQuery expansion rewriting and the above + * points are not relevant then use this to change + * the rewrite method. + */ + public void setMultiTermRewriteMethod(MultiTermQuery.RewriteMethod method) { + multiTermRewriteMethod = method; + } + + + /** + * @see #setMultiTermRewriteMethod + */ + public MultiTermQuery.RewriteMethod getMultiTermRewriteMethod() { + return multiTermRewriteMethod; + } + + /** + * Set locale used by date range parsing. + */ + public void setLocale(Locale locale) { + this.locale = locale; + } + + /** + * Returns current locale, allowing access by subclasses. + */ + public Locale getLocale() { + return locale; + } + + /** + * Sets the default date resolution used by RangeQueries for fields for which no + * specific date resolutions has been set. Field specific resolutions can be set + * with {@link #setDateResolution(String, org.apache.lucene.document.DateTools.Resolution)}. + * + * @param dateResolution the default date resolution to set + */ + public void setDateResolution(DateTools.Resolution dateResolution) { + this.dateResolution = dateResolution; + } + + /** + * Sets the date resolution used by RangeQueries for a specific field. + * + * @param fieldName field for which the date resolution is to be set + * @param dateResolution date resolution to set + */ + public void setDateResolution(String fieldName, DateTools.Resolution dateResolution) { + if (fieldName == null) { + throw new IllegalArgumentException("Field cannot be null."); + } + + if (fieldToDateResolution == null) { + // lazily initialize HashMap + fieldToDateResolution = new HashMap(); + } + + fieldToDateResolution.put(fieldName, dateResolution); + } + + /** + * Returns the date resolution that is used by RangeQueries for the given field. + * Returns null, if no default or field specific date resolution has been set + * for the given field. + * + */ + public DateTools.Resolution getDateResolution(String fieldName) { + if (fieldName == null) { + throw new IllegalArgumentException("Field cannot be null."); + } + + if (fieldToDateResolution == null) { + // no field specific date resolutions set; return default date resolution instead + return this.dateResolution; + } + + DateTools.Resolution resolution = fieldToDateResolution.get(fieldName); + if (resolution == null) { + // no date resolutions set for the given field; return default date resolution instead + resolution = this.dateResolution; + } + + return resolution; + } + + /** + * Sets the collator used to determine index term inclusion in ranges + * for RangeQuerys. + *

      + * WARNING: Setting the rangeCollator to a non-null + * collator using this method will cause every single index Term in the + * Field referenced by lowerTerm and/or upperTerm to be examined. + * Depending on the number of index Terms in this Field, the operation could + * be very slow. + * + * @param rc the collator to use when constructing RangeQuerys + */ + public void setRangeCollator(Collator rc) { + rangeCollator = rc; + } + + /** + * @return the collator used to determine index term inclusion in ranges + * for RangeQuerys. + */ + public Collator getRangeCollator() { + return rangeCollator; + } + + protected void addClause(List clauses, int conj, int mods, Query q) { + boolean required, prohibited; + + // If this term is introduced by AND, make the preceding term required, + // unless it's already prohibited + if (clauses.size() > 0 && conj == CONJ_AND) { + BooleanClause c = clauses.get(clauses.size()-1); + if (!c.isProhibited()) + c.setOccur(BooleanClause.Occur.MUST); + } + + if (clauses.size() > 0 && operator == AND_OPERATOR && conj == CONJ_OR) { + // If this term is introduced by OR, make the preceding term optional, + // unless it's prohibited (that means we leave -a OR b but +a OR b-->a OR b) + // notice if the input is a OR b, first term is parsed as required; without + // this modification a OR b would parsed as +a OR b + BooleanClause c = clauses.get(clauses.size()-1); + if (!c.isProhibited()) + c.setOccur(BooleanClause.Occur.SHOULD); + } + + // We might have been passed a null query; the term might have been + // filtered away by the analyzer. + if (q == null) + return; + + if (operator == OR_OPERATOR) { + // We set REQUIRED if we're introduced by AND or +; PROHIBITED if + // introduced by NOT or -; make sure not to set both. + prohibited = (mods == MOD_NOT); + required = (mods == MOD_REQ); + if (conj == CONJ_AND && !prohibited) { + required = true; + } + } else { + // We set PROHIBITED if we're introduced by NOT or -; We set REQUIRED + // if not PROHIBITED and not introduced by OR + prohibited = (mods == MOD_NOT); + required = (!prohibited && conj != CONJ_OR); + } + if (required && !prohibited) + clauses.add(newBooleanClause(q, BooleanClause.Occur.MUST)); + else if (!required && !prohibited) + clauses.add(newBooleanClause(q, BooleanClause.Occur.SHOULD)); + else if (!required && prohibited) + clauses.add(newBooleanClause(q, BooleanClause.Occur.MUST_NOT)); + else + throw new RuntimeException("Clause cannot be both required and prohibited"); + } + + /** + * @exception org.apache.lucene.queryParser.ParseException throw in overridden method to disallow + */ + protected Query getFieldQuery(String field, String queryText, boolean quoted) throws ParseException { + // Use the analyzer to get all the tokens, and then build a TermQuery, + // PhraseQuery, or nothing based on the term count + + TokenStream source; + try { + source = analyzer.reusableTokenStream(field, new StringReader(queryText)); + source.reset(); + } catch (IOException e) { + source = analyzer.tokenStream(field, new StringReader(queryText)); + } + CachingTokenFilter buffer = new CachingTokenFilter(source); + TermToBytesRefAttribute termAtt = null; + PositionIncrementAttribute posIncrAtt = null; + int numTokens = 0; + + boolean success = false; + try { + buffer.reset(); + success = true; + } catch (IOException e) { + // success==false if we hit an exception + } + if (success) { + if (buffer.hasAttribute(TermToBytesRefAttribute.class)) { + termAtt = buffer.getAttribute(TermToBytesRefAttribute.class); + } + if (buffer.hasAttribute(PositionIncrementAttribute.class)) { + posIncrAtt = buffer.getAttribute(PositionIncrementAttribute.class); + } + } + + int positionCount = 0; + boolean severalTokensAtSamePosition = false; + + boolean hasMoreTokens = false; + if (termAtt != null) { + try { + hasMoreTokens = buffer.incrementToken(); + while (hasMoreTokens) { + numTokens++; + int positionIncrement = (posIncrAtt != null) ? posIncrAtt.getPositionIncrement() : 1; + if (positionIncrement != 0) { + positionCount += positionIncrement; + } else { + severalTokensAtSamePosition = true; + } + hasMoreTokens = buffer.incrementToken(); + } + } catch (IOException e) { + // ignore + } + } + try { + // rewind the buffer stream + buffer.reset(); + + // close original stream - all tokens buffered + source.close(); + } + catch (IOException e) { + // ignore + } + + if (numTokens == 0) + return null; + else if (numTokens == 1) { + BytesRef term = new BytesRef(); + try { + boolean hasNext = buffer.incrementToken(); + assert hasNext == true; + termAtt.toBytesRef(term); + } catch (IOException e) { + // safe to ignore, because we know the number of tokens + } + return newTermQuery(new Term(field, term)); + } else { + if (severalTokensAtSamePosition || (!quoted && !autoGeneratePhraseQueries)) { + if (positionCount == 1 || (!quoted && !autoGeneratePhraseQueries)) { + // no phrase query: + BooleanQuery q = newBooleanQuery(positionCount == 1); + + BooleanClause.Occur occur = positionCount > 1 && operator == AND_OPERATOR ? + BooleanClause.Occur.MUST : BooleanClause.Occur.SHOULD; + + for (int i = 0; i < numTokens; i++) { + BytesRef term = new BytesRef(); + try { + boolean hasNext = buffer.incrementToken(); + assert hasNext == true; + termAtt.toBytesRef(term); + } catch (IOException e) { + // safe to ignore, because we know the number of tokens + } + + Query currentQuery = newTermQuery( + new Term(field, term)); + q.add(currentQuery, occur); + } + return q; + } + else { + // phrase query: + MultiPhraseQuery mpq = newMultiPhraseQuery(); + mpq.setSlop(phraseSlop); + List multiTerms = new ArrayList(); + int position = -1; + for (int i = 0; i < numTokens; i++) { + BytesRef term = new BytesRef(); + int positionIncrement = 1; + try { + boolean hasNext = buffer.incrementToken(); + assert hasNext == true; + termAtt.toBytesRef(term); + if (posIncrAtt != null) { + positionIncrement = posIncrAtt.getPositionIncrement(); + } + } catch (IOException e) { + // safe to ignore, because we know the number of tokens + } + + if (positionIncrement > 0 && multiTerms.size() > 0) { + if (enablePositionIncrements) { + mpq.add(multiTerms.toArray(new Term[0]),position); + } else { + mpq.add(multiTerms.toArray(new Term[0])); + } + multiTerms.clear(); + } + position += positionIncrement; + multiTerms.add(new Term(field, term)); + } + if (enablePositionIncrements) { + mpq.add(multiTerms.toArray(new Term[0]),position); + } else { + mpq.add(multiTerms.toArray(new Term[0])); + } + return mpq; + } + } + else { + PhraseQuery pq = newPhraseQuery(); + pq.setSlop(phraseSlop); + int position = -1; + + + for (int i = 0; i < numTokens; i++) { + BytesRef term = new BytesRef(); + int positionIncrement = 1; + + try { + boolean hasNext = buffer.incrementToken(); + assert hasNext == true; + termAtt.toBytesRef(term); + if (posIncrAtt != null) { + positionIncrement = posIncrAtt.getPositionIncrement(); + } + } catch (IOException e) { + // safe to ignore, because we know the number of tokens + } + + if (enablePositionIncrements) { + position += positionIncrement; + pq.add(new Term(field, term),position); + } else { + pq.add(new Term(field, term)); + } + } + return pq; + } + } + } + + + + /** + * Base implementation delegates to {@link #getFieldQuery(String,String,boolean)}. + * This method may be overridden, for example, to return + * a SpanNearQuery instead of a PhraseQuery. + * + * @exception org.apache.lucene.queryParser.ParseException throw in overridden method to disallow + */ + protected Query getFieldQuery(String field, String queryText, int slop) + throws ParseException { + Query query = getFieldQuery(field, queryText, true); + + if (query instanceof PhraseQuery) { + ((PhraseQuery) query).setSlop(slop); + } + if (query instanceof MultiPhraseQuery) { + ((MultiPhraseQuery) query).setSlop(slop); + } + + return query; + } + + /** + * + * @exception org.apache.lucene.queryParser.ParseException + */ + protected Query getRangeQuery(String field, + String part1, + String part2, + boolean startInclusive, + boolean endInclusive) throws ParseException + { + if (lowercaseExpandedTerms) { + part1 = part1==null ? null : part1.toLowerCase(); + part2 = part2==null ? null : part2.toLowerCase(); + } + + + DateFormat df = DateFormat.getDateInstance(DateFormat.SHORT, locale); + df.setLenient(true); + DateTools.Resolution resolution = getDateResolution(field); + + try { + part1 = DateTools.dateToString(df.parse(part1), resolution); + } catch (Exception e) { } + + try { + Date d2 = df.parse(part2); + if (endInclusive) { + // The user can only specify the date, not the time, so make sure + // the time is set to the latest possible time of that date to really + // include all documents: + Calendar cal = Calendar.getInstance(locale); + cal.setTime(d2); + cal.set(Calendar.HOUR_OF_DAY, 23); + cal.set(Calendar.MINUTE, 59); + cal.set(Calendar.SECOND, 59); + cal.set(Calendar.MILLISECOND, 999); + d2 = cal.getTime(); + } + part2 = DateTools.dateToString(d2, resolution); + } catch (Exception e) { } + + return newRangeQuery(field, part1, part2, startInclusive, endInclusive); + } + + /** + * Builds a new BooleanQuery instance + * @param disableCoord disable coord + * @return new BooleanQuery instance + */ + protected BooleanQuery newBooleanQuery(boolean disableCoord) { + return new BooleanQuery(disableCoord); + } + + /** + * Builds a new BooleanClause instance + * @param q sub query + * @param occur how this clause should occur when matching documents + * @return new BooleanClause instance + */ + protected BooleanClause newBooleanClause(Query q, BooleanClause.Occur occur) { + return new BooleanClause(q, occur); + } + + /** + * Builds a new TermQuery instance + * @param term term + * @return new TermQuery instance + */ + protected Query newTermQuery(Term term){ + return new TermQuery(term); + } + + /** + * Builds a new PhraseQuery instance + * @return new PhraseQuery instance + */ + protected PhraseQuery newPhraseQuery(){ + return new PhraseQuery(); + } + + /** + * Builds a new MultiPhraseQuery instance + * @return new MultiPhraseQuery instance + */ + protected MultiPhraseQuery newMultiPhraseQuery(){ + return new MultiPhraseQuery(); + } + + /** + * Builds a new PrefixQuery instance + * @param prefix Prefix term + * @return new PrefixQuery instance + */ + protected Query newPrefixQuery(Term prefix){ + PrefixQuery query = new PrefixQuery(prefix); + query.setRewriteMethod(multiTermRewriteMethod); + return query; + } + + /** + * Builds a new RegexpQuery instance + * @param regexp Regexp term + * @return new RegexpQuery instance + */ + protected Query newRegexpQuery(Term regexp) { + RegexpQuery query = new RegexpQuery(regexp); + query.setRewriteMethod(multiTermRewriteMethod); + return query; + } + + /** + * Builds a new FuzzyQuery instance + * @param term Term + * @param minimumSimilarity minimum similarity + * @param prefixLength prefix length + * @return new FuzzyQuery Instance + */ + protected Query newFuzzyQuery(Term term, float minimumSimilarity, int prefixLength) { + // FuzzyQuery doesn't yet allow constant score rewrite + return new FuzzyQuery(term,minimumSimilarity,prefixLength); + } + + /** + * Builds a new TermRangeQuery instance + * @param field Field + * @param part1 min + * @param part2 max + * @param startInclusive true if the start of the range is inclusive + * @param endInclusive true if the end of the range is inclusive + * @return new TermRangeQuery instance + */ + protected Query newRangeQuery(String field, String part1, String part2, boolean startInclusive, boolean endInclusive) { + final TermRangeQuery query = new TermRangeQuery(field, part1, part2, startInclusive, endInclusive, rangeCollator); + query.setRewriteMethod(multiTermRewriteMethod); + return query; + } + + /** + * Builds a new MatchAllDocsQuery instance + * @return new MatchAllDocsQuery instance + */ + protected Query newMatchAllDocsQuery() { + return new MatchAllDocsQuery(); + } + + /** + * Builds a new WildcardQuery instance + * @param t wildcard term + * @return new WildcardQuery instance + */ + protected Query newWildcardQuery(Term t) { + WildcardQuery query = new WildcardQuery(t); + query.setRewriteMethod(multiTermRewriteMethod); + return query; + } + + /** + * Factory method for generating query, given a set of clauses. + * By default creates a boolean query composed of clauses passed in. + * + * Can be overridden by extending classes, to modify query being + * returned. + * + * @param clauses List that contains {@link org.apache.lucene.search.BooleanClause} instances + * to join. + * + * @return Resulting {@link org.apache.lucene.search.Query} object. + * @exception org.apache.lucene.queryParser.ParseException throw in overridden method to disallow + */ + protected Query getBooleanQuery(List clauses) throws ParseException { + return getBooleanQuery(clauses, false); + } + + /** + * Factory method for generating query, given a set of clauses. + * By default creates a boolean query composed of clauses passed in. + * + * Can be overridden by extending classes, to modify query being + * returned. + * + * @param clauses List that contains {@link org.apache.lucene.search.BooleanClause} instances + * to join. + * @param disableCoord true if coord scoring should be disabled. + * + * @return Resulting {@link org.apache.lucene.search.Query} object. + * @exception org.apache.lucene.queryParser.ParseException throw in overridden method to disallow + */ + protected Query getBooleanQuery(List clauses, boolean disableCoord) + throws ParseException + { + if (clauses.size()==0) { + return null; // all clause words were filtered away by the analyzer. + } + BooleanQuery query = newBooleanQuery(disableCoord); + for(final BooleanClause clause: clauses) { + query.add(clause); + } + return query; + } + + /** + * Factory method for generating a query. Called when parser + * parses an input term token that contains one or more wildcard + * characters (? and *), but is not a prefix term token (one + * that has just a single * character at the end) + *

      + * Depending on settings, prefix term may be lower-cased + * automatically. It will not go through the default Analyzer, + * however, since normal Analyzers are unlikely to work properly + * with wildcard templates. + *

      + * Can be overridden by extending classes, to provide custom handling for + * wildcard queries, which may be necessary due to missing analyzer calls. + * + * @param field Name of the field query will use. + * @param termStr Term token that contains one or more wild card + * characters (? or *), but is not simple prefix term + * + * @return Resulting {@link org.apache.lucene.search.Query} built for the term + * @exception org.apache.lucene.queryParser.ParseException throw in overridden method to disallow + */ + protected Query getWildcardQuery(String field, String termStr) throws ParseException + { + if ("*".equals(field)) { + if ("*".equals(termStr)) return newMatchAllDocsQuery(); + } + if (!allowLeadingWildcard && (termStr.startsWith("*") || termStr.startsWith("?"))) + throw new ParseException("'*' or '?' not allowed as first character in WildcardQuery"); + if (lowercaseExpandedTerms) { + termStr = termStr.toLowerCase(); + } + Term t = new Term(field, termStr); + return newWildcardQuery(t); + } + + /** + * Factory method for generating a query. Called when parser + * parses an input term token that contains a regular expression + * query. + *

      + * Depending on settings, pattern term may be lower-cased + * automatically. It will not go through the default Analyzer, + * however, since normal Analyzers are unlikely to work properly + * with regular expression templates. + *

      + * Can be overridden by extending classes, to provide custom handling for + * regular expression queries, which may be necessary due to missing analyzer + * calls. + * + * @param field Name of the field query will use. + * @param termStr Term token that contains a regular expression + * + * @return Resulting {@link org.apache.lucene.search.Query} built for the term + * @exception org.apache.lucene.queryParser.ParseException throw in overridden method to disallow + */ + protected Query getRegexpQuery(String field, String termStr) throws ParseException + { + if (lowercaseExpandedTerms) { + termStr = termStr.toLowerCase(); + } + Term t = new Term(field, termStr); + return newRegexpQuery(t); + } + + /** + * Factory method for generating a query (similar to + * {@link #getWildcardQuery}). Called when parser parses an input term + * token that uses prefix notation; that is, contains a single '*' wildcard + * character as its last character. Since this is a special case + * of generic wildcard term, and such a query can be optimized easily, + * this usually results in a different query object. + *

      + * Depending on settings, a prefix term may be lower-cased + * automatically. It will not go through the default Analyzer, + * however, since normal Analyzers are unlikely to work properly + * with wildcard templates. + *

      + * Can be overridden by extending classes, to provide custom handling for + * wild card queries, which may be necessary due to missing analyzer calls. + * + * @param field Name of the field query will use. + * @param termStr Term token to use for building term for the query + * (without trailing '*' character!) + * + * @return Resulting {@link org.apache.lucene.search.Query} built for the term + * @exception org.apache.lucene.queryParser.ParseException throw in overridden method to disallow + */ + protected Query getPrefixQuery(String field, String termStr) throws ParseException + { + if (!allowLeadingWildcard && termStr.startsWith("*")) + throw new ParseException("'*' not allowed as first character in PrefixQuery"); + if (lowercaseExpandedTerms) { + termStr = termStr.toLowerCase(); + } + Term t = new Term(field, termStr); + return newPrefixQuery(t); + } + + /** + * Factory method for generating a query (similar to + * {@link #getWildcardQuery}). Called when parser parses + * an input term token that has the fuzzy suffix (~) appended. + * + * @param field Name of the field query will use. + * @param termStr Term token to use for building term for the query + * + * @return Resulting {@link org.apache.lucene.search.Query} built for the term + * @exception org.apache.lucene.queryParser.ParseException throw in overridden method to disallow + */ + protected Query getFuzzyQuery(String field, String termStr, float minSimilarity) throws ParseException + { + if (lowercaseExpandedTerms) { + termStr = termStr.toLowerCase(); + } + Term t = new Term(field, termStr); + return newFuzzyQuery(t, minSimilarity, fuzzyPrefixLength); + } + + + // extracted from the .jj grammar + Query handleBareTokenQuery(String qfield, Token term, Token fuzzySlop, boolean prefix, boolean wildcard, boolean fuzzy, boolean regexp) throws ParseException { + Query q; + + String termImage=discardEscapeChar(term.image); + if (wildcard) { + q = getWildcardQuery(qfield, term.image); + } else if (prefix) { + q = getPrefixQuery(qfield, + discardEscapeChar(term.image.substring + (0, term.image.length()-1))); + } else if (regexp) { + q = getRegexpQuery(qfield, term.image.substring(1, term.image.length()-1)); + } else if (fuzzy) { + float fms = fuzzyMinSim; + try { + fms = Float.valueOf(fuzzySlop.image.substring(1)).floatValue(); + } catch (Exception ignored) { } + if(fms < 0.0f){ + throw new ParseException("Minimum similarity for a FuzzyQuery has to be between 0.0f and 1.0f !"); + } else if (fms >= 1.0f && fms != (int) fms) { + throw new ParseException("Fractional edit distances are not allowed!"); + } + q = getFuzzyQuery(qfield, termImage, fms); + } else { + q = getFieldQuery(qfield, termImage, false); + } + return q; + } + + // extracted from the .jj grammar + Query handleQuotedTerm(String qfield, Token term, Token fuzzySlop) throws ParseException { + int s = phraseSlop; // default + if (fuzzySlop != null) { + try { + s = Float.valueOf(fuzzySlop.image.substring(1)).intValue(); + } + catch (Exception ignored) { } + } + return getFieldQuery(qfield, discardEscapeChar(term.image.substring(1, term.image.length()-1)), s); + } + + // extracted from the .jj grammar + Query handleBoost(Query q, Token boost) throws ParseException { + if (boost != null) { + float f = (float) 1.0; + try { + f = Float.valueOf(boost.image).floatValue(); + } + catch (Exception ignored) { + /* Should this be handled somehow? (defaults to "no boost", if + * boost number is invalid) + */ + } + + // avoid boosting null queries, such as those caused by stop words + if (q != null) { + q.setBoost(f); + } + } + return q; + } + + + + /** + * Returns a String where the escape char has been + * removed, or kept only once if there was a double escape. + * + * Supports escaped unicode characters, e. g. translates + * \\u0041 to A. + * + */ + String discardEscapeChar(String input) throws ParseException { + // Create char array to hold unescaped char sequence + char[] output = new char[input.length()]; + + // The length of the output can be less than the input + // due to discarded escape chars. This variable holds + // the actual length of the output + int length = 0; + + // We remember whether the last processed character was + // an escape character + boolean lastCharWasEscapeChar = false; + + // The multiplier the current unicode digit must be multiplied with. + // E. g. the first digit must be multiplied with 16^3, the second with 16^2... + int codePointMultiplier = 0; + + // Used to calculate the codepoint of the escaped unicode character + int codePoint = 0; + + for (int i = 0; i < input.length(); i++) { + char curChar = input.charAt(i); + if (codePointMultiplier > 0) { + codePoint += hexToInt(curChar) * codePointMultiplier; + codePointMultiplier >>>= 4; + if (codePointMultiplier == 0) { + output[length++] = (char)codePoint; + codePoint = 0; + } + } else if (lastCharWasEscapeChar) { + if (curChar == 'u') { + // found an escaped unicode character + codePointMultiplier = 16 * 16 * 16; + } else { + // this character was escaped + output[length] = curChar; + length++; + } + lastCharWasEscapeChar = false; + } else { + if (curChar == '\\') { + lastCharWasEscapeChar = true; + } else { + output[length] = curChar; + length++; + } + } + } + + if (codePointMultiplier > 0) { + throw new ParseException("Truncated unicode escape sequence."); + } + + if (lastCharWasEscapeChar) { + throw new ParseException("Term can not end with escape character."); + } + + return new String(output, 0, length); + } + + /** Returns the numeric value of the hexadecimal character */ + static final int hexToInt(char c) throws ParseException { + if ('0' <= c && c <= '9') { + return c - '0'; + } else if ('a' <= c && c <= 'f'){ + return c - 'a' + 10; + } else if ('A' <= c && c <= 'F') { + return c - 'A' + 10; + } else { + throw new ParseException("None-hex character in unicode escape sequence: " + c); + } + } + + /** + * Returns a String where those characters that QueryParser + * expects to be escaped are escaped by a preceding \. + */ + public static String escape(String s) { + StringBuilder sb = new StringBuilder(); + for (int i = 0; i < s.length(); i++) { + char c = s.charAt(i); + // These characters are part of the query syntax and must be escaped + if (c == '\\' || c == '+' || c == '-' || c == '!' || c == '(' || c == ')' || c == ':' + || c == '^' || c == '[' || c == ']' || c == '\"' || c == '{' || c == '}' || c == '~' + || c == '*' || c == '?' || c == '|' || c == '&') { + sb.append('\\'); + } + sb.append(c); + } + return sb.toString(); + } + +} diff --git a/lucene/src/java/org/apache/lucene/search/AutomatonQuery.java b/lucene/src/java/org/apache/lucene/search/AutomatonQuery.java index c70db936287..4df94c4486e 100644 --- a/lucene/src/java/org/apache/lucene/search/AutomatonQuery.java +++ b/lucene/src/java/org/apache/lucene/search/AutomatonQuery.java @@ -18,15 +18,15 @@ package org.apache.lucene.search; */ import java.io.IOException; +import java.io.Serializable; import org.apache.lucene.index.Term; import org.apache.lucene.index.Terms; import org.apache.lucene.index.TermsEnum; +import org.apache.lucene.search.AutomatonTermsEnum.CompiledAutomaton; import org.apache.lucene.util.ToStringUtils; import org.apache.lucene.util.AttributeSource; -import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.automaton.Automaton; -import org.apache.lucene.util.automaton.ByteRunAutomaton; import org.apache.lucene.util.automaton.BasicAutomata; import org.apache.lucene.util.automaton.BasicOperations; import org.apache.lucene.util.automaton.MinimizationOperations; @@ -56,9 +56,16 @@ public class AutomatonQuery extends MultiTermQuery { /** term containing the field, and possibly some pattern structure */ protected final Term term; - transient ByteRunAutomaton runAutomaton; - transient boolean isFinite; - transient BytesRef commonSuffixRef; + /** + * abstraction for returning a termsenum: + * in the ctor the query computes one of these, the actual + * implementation depends upon the automaton's structure. + */ + private abstract class TermsEnumFactory implements Serializable { + protected abstract TermsEnum getTermsEnum(Terms terms, AttributeSource atts) throws IOException; + } + + private final TermsEnumFactory factory; /** * Create a new AutomatonQuery from an {@link Automaton}. @@ -68,60 +75,77 @@ public class AutomatonQuery extends MultiTermQuery { * @param automaton Automaton to run, terms that are accepted are considered a * match. */ - public AutomatonQuery(Term term, Automaton automaton) { + public AutomatonQuery(final Term term, Automaton automaton) { super(term.field()); this.term = term; this.automaton = automaton; MinimizationOperations.minimize(automaton); - } - - private synchronized void compileAutomaton() { - // this method must be synchronized, as setting the three transient fields is not atomic: - if (runAutomaton == null) { - runAutomaton = new ByteRunAutomaton(automaton); - isFinite = SpecialOperations.isFinite(automaton); - commonSuffixRef = isFinite ? null : SpecialOperations.getCommonSuffixBytesRef(runAutomaton.getAutomaton()); + + if (BasicOperations.isEmpty(automaton)) { + // matches nothing + factory = new TermsEnumFactory() { + @Override + protected TermsEnum getTermsEnum(Terms terms, AttributeSource atts) throws IOException { + return TermsEnum.EMPTY; + } + }; + } else if (BasicOperations.isTotal(automaton)) { + // matches all possible strings + factory = new TermsEnumFactory() { + @Override + protected TermsEnum getTermsEnum(Terms terms, AttributeSource atts) throws IOException { + return terms.iterator(); + } + }; + } else { + final String singleton; + final String commonPrefix; + + if (automaton.getSingleton() == null) { + commonPrefix = SpecialOperations.getCommonPrefix(automaton); + if (commonPrefix.length() > 0 && BasicOperations.sameLanguage(automaton, BasicAutomata.makeString(commonPrefix))) { + singleton = commonPrefix; + } else { + singleton = null; + } + } else { + commonPrefix = null; + singleton = automaton.getSingleton(); + } + + if (singleton != null) { + // matches a fixed string in singleton or expanded representation + factory = new TermsEnumFactory() { + @Override + protected TermsEnum getTermsEnum(Terms terms, AttributeSource atts) throws IOException { + return new SingleTermsEnum(terms.iterator(), term.createTerm(singleton)); + } + }; + } else if (BasicOperations.sameLanguage(automaton, BasicOperations.concatenate( + BasicAutomata.makeString(commonPrefix), BasicAutomata.makeAnyString()))) { + // matches a constant prefix + factory = new TermsEnumFactory() { + @Override + protected TermsEnum getTermsEnum(Terms terms, AttributeSource atts) throws IOException { + return new PrefixTermsEnum(terms.iterator(), term.createTerm(commonPrefix)); + } + }; + } else { + final AutomatonTermsEnum.CompiledAutomaton compiled = + new CompiledAutomaton(automaton, SpecialOperations.isFinite(automaton)); + factory = new TermsEnumFactory() { + @Override + protected TermsEnum getTermsEnum(Terms terms, AttributeSource atts) throws IOException { + return new AutomatonTermsEnum(terms.iterator(), compiled); + } + }; + } } } @Override protected TermsEnum getTermsEnum(Terms terms, AttributeSource atts) throws IOException { - // matches nothing - if (BasicOperations.isEmpty(automaton)) { - return TermsEnum.EMPTY; - } - - TermsEnum tenum = terms.iterator(); - - // matches all possible strings - if (BasicOperations.isTotal(automaton)) { - return tenum; - } - - // matches a fixed string in singleton representation - String singleton = automaton.getSingleton(); - if (singleton != null) - return new SingleTermsEnum(tenum, term.createTerm(singleton)); - - // matches a fixed string in expanded representation - final String commonPrefix = SpecialOperations.getCommonPrefix(automaton); - - if (commonPrefix.length() > 0) { - if (BasicOperations.sameLanguage(automaton, BasicAutomata.makeString(commonPrefix))) { - return new SingleTermsEnum(tenum, term.createTerm(commonPrefix)); - } - - // matches a constant prefix - Automaton prefixAutomaton = BasicOperations.concatenate(BasicAutomata - .makeString(commonPrefix), BasicAutomata.makeAnyString()); - if (BasicOperations.sameLanguage(automaton, prefixAutomaton)) { - return new PrefixTermsEnum(tenum, term.createTerm(commonPrefix)); - } - } - - compileAutomaton(); - - return new AutomatonTermsEnum(runAutomaton, tenum, isFinite, commonSuffixRef); + return factory.getTermsEnum(terms, atts); } @Override diff --git a/lucene/src/java/org/apache/lucene/search/AutomatonTermsEnum.java b/lucene/src/java/org/apache/lucene/search/AutomatonTermsEnum.java index 401c6133601..58cb5dd851b 100644 --- a/lucene/src/java/org/apache/lucene/search/AutomatonTermsEnum.java +++ b/lucene/src/java/org/apache/lucene/search/AutomatonTermsEnum.java @@ -27,6 +27,7 @@ import org.apache.lucene.util.automaton.Automaton; import org.apache.lucene.util.automaton.ByteRunAutomaton; import org.apache.lucene.util.automaton.SpecialOperations; import org.apache.lucene.util.automaton.Transition; +import org.apache.lucene.util.automaton.UTF32ToUTF8; /** * A FilteredTermsEnum that enumerates terms based upon what is accepted by a @@ -46,8 +47,6 @@ import org.apache.lucene.util.automaton.Transition; * @lucene.experimental */ public class AutomatonTermsEnum extends FilteredTermsEnum { - // the object-oriented form of the DFA - private final Automaton automaton; // a tableized array-based form of the DFA private final ByteRunAutomaton runAutomaton; // common suffix of the automaton @@ -71,54 +70,26 @@ public class AutomatonTermsEnum extends FilteredTermsEnum { private final Comparator termComp; /** - * Expert ctor: * Construct an enumerator based upon an automaton, enumerating the specified * field, working on a supplied TermsEnum *

      * @lucene.experimental *

      - * @param runAutomaton pre-compiled ByteRunAutomaton - * @param finite true if the automaton accepts a finite language + * @param compiled CompiledAutomaton */ - public AutomatonTermsEnum(ByteRunAutomaton runAutomaton, - TermsEnum tenum, - boolean finite, BytesRef commonSuffixRef) - throws IOException { + public AutomatonTermsEnum(TermsEnum tenum, CompiledAutomaton compiled) throws IOException { super(tenum); - this.automaton = runAutomaton.getAutomaton(); - this.finite = finite; + this.finite = compiled.finite; + this.runAutomaton = compiled.runAutomaton; + this.commonSuffixRef = compiled.commonSuffixRef; + this.allTransitions = compiled.sortedTransitions; - this.runAutomaton = runAutomaton; - if (finite) { - // don't use suffix w/ finite DFAs - this.commonSuffixRef = null; - } else if (commonSuffixRef == null) { - // compute now - this.commonSuffixRef = SpecialOperations.getCommonSuffixBytesRef(automaton); - } else { - // precomputed - this.commonSuffixRef = commonSuffixRef; - } - - // build a cache of sorted transitions for every state - allTransitions = this.automaton.getSortedTransitions(); // used for path tracking, where each bit is a numbered state. visited = new long[runAutomaton.getSize()]; termComp = getComparator(); } - /** - * Construct an enumerator based upon an automaton, enumerating the specified - * field, working on a supplied TermsEnum - *

      - * It will automatically calculate whether or not the automaton is finite - */ - public AutomatonTermsEnum(Automaton automaton, TermsEnum tenum) - throws IOException { - this(new ByteRunAutomaton(automaton), tenum, SpecialOperations.isFinite(automaton), null); - } - /** * Returns true if the term matches the automaton. Also stashes away the term * to assist with smart enumeration. @@ -140,9 +111,9 @@ public class AutomatonTermsEnum extends FilteredTermsEnum { @Override protected BytesRef nextSeekTerm(final BytesRef term) throws IOException { if (term == null) { - seekBytesRef.copy(""); + assert seekBytesRef.length == 0; // return the empty term, as its valid - if (runAutomaton.run(seekBytesRef.bytes, seekBytesRef.offset, seekBytesRef.length)) { + if (runAutomaton.isAccept(runAutomaton.getInitialState())) { return seekBytesRef; } } else { @@ -151,25 +122,20 @@ public class AutomatonTermsEnum extends FilteredTermsEnum { // seek to the next possible string; if (nextString()) { - // reposition - - if (linear) - setLinear(infinitePosition); - return seekBytesRef; + return seekBytesRef; // reposition + } else { + return null; // no more possible strings can match } - // no more possible strings can match - return null; } - // this instance prevents unicode conversion during backtracking, - // we can just call setLinear once at the end. - int infinitePosition; - /** * Sets the enum to operate in linear fashion, as we have found - * a looping transition at position + * a looping transition at position: we set an upper bound and + * act like a TermRangeQuery for this portion of the term space. */ private void setLinear(int position) { + assert linear == false; + int state = runAutomaton.getInitialState(); int maxInterval = 0xff; for (int i = 0; i < position; i++) { @@ -193,6 +159,8 @@ public class AutomatonTermsEnum extends FilteredTermsEnum { System.arraycopy(seekBytesRef.bytes, 0, linearUpperBound.bytes, 0, position); linearUpperBound.bytes[position] = (byte) maxInterval; linearUpperBound.length = length; + + linear = true; } private final IntsRef savedStates = new IntsRef(10); @@ -226,8 +194,7 @@ public class AutomatonTermsEnum extends FilteredTermsEnum { states[pos+1] = nextState; // we found a loop, record it for faster enumeration if (!finite && !linear && visited[nextState] == curGen) { - linear = true; - infinitePosition = pos; + setLinear(pos); } state = nextState; } @@ -313,15 +280,16 @@ public class AutomatonTermsEnum extends FilteredTermsEnum { */ transition = allTransitions[state][0]; state = transition.getDest().getNumber(); - // we found a loop, record it for faster enumeration - if (!finite && !linear && visited[state] == curGen) { - linear = true; - infinitePosition = seekBytesRef.length; - } + // append the minimum transition seekBytesRef.grow(seekBytesRef.length + 1); seekBytesRef.length++; seekBytesRef.bytes[seekBytesRef.length - 1] = (byte) transition.getMin(); + + // we found a loop, record it for faster enumeration + if (!finite && !linear && visited[state] == curGen) { + setLinear(seekBytesRef.length-1); + } } return true; } @@ -350,4 +318,26 @@ public class AutomatonTermsEnum extends FilteredTermsEnum { } return -1; /* all solutions exhausted */ } + + /** + * immutable class with everything this enum needs. + */ + public static class CompiledAutomaton { + public final ByteRunAutomaton runAutomaton; + public final Transition[][] sortedTransitions; + public final BytesRef commonSuffixRef; + public final boolean finite; + + public CompiledAutomaton(Automaton automaton, boolean finite) { + Automaton utf8 = new UTF32ToUTF8().convert(automaton); + runAutomaton = new ByteRunAutomaton(utf8, true); + sortedTransitions = utf8.getSortedTransitions(); + this.finite = finite; + if (finite) { + commonSuffixRef = null; + } else { + commonSuffixRef = SpecialOperations.getCommonSuffixBytesRef(utf8); + } + } + } } diff --git a/lucene/src/java/org/apache/lucene/search/BooleanQuery.java b/lucene/src/java/org/apache/lucene/search/BooleanQuery.java index 56f7d098114..8e2240cdea9 100644 --- a/lucene/src/java/org/apache/lucene/search/BooleanQuery.java +++ b/lucene/src/java/org/apache/lucene/search/BooleanQuery.java @@ -18,6 +18,7 @@ package org.apache.lucene.search; */ import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexReader.AtomicReaderContext; import org.apache.lucene.index.Term; import org.apache.lucene.util.ToStringUtils; import org.apache.lucene.search.BooleanClause.Occur; @@ -62,46 +63,32 @@ public class BooleanQuery extends Query implements Iterable { } private ArrayList clauses = new ArrayList(); - private boolean disableCoord; + private final boolean disableCoord; /** Constructs an empty boolean query. */ - public BooleanQuery() {} + public BooleanQuery() { + disableCoord = false; + } /** Constructs an empty boolean query. * - * {@link Similarity#coord(int,int)} may be disabled in scoring, as + * {@link SimilarityProvider#coord(int,int)} may be disabled in scoring, as * appropriate. For example, this score factor does not make sense for most * automatically generated queries, like {@link WildcardQuery} and {@link * FuzzyQuery}. * - * @param disableCoord disables {@link Similarity#coord(int,int)} in scoring. + * @param disableCoord disables {@link SimilarityProvider#coord(int,int)} in scoring. */ public BooleanQuery(boolean disableCoord) { this.disableCoord = disableCoord; } - /** Returns true iff {@link Similarity#coord(int,int)} is disabled in + /** Returns true iff {@link SimilarityProvider#coord(int,int)} is disabled in * scoring for this query instance. * @see #BooleanQuery(boolean) */ public boolean isCoordDisabled() { return disableCoord; } - // Implement coord disabling. - // Inherit javadoc. - @Override - public Similarity getSimilarity(IndexSearcher searcher) { - Similarity result = super.getSimilarity(searcher); - if (disableCoord) { // disable coord as requested - result = new SimilarityDelegator(result) { - @Override - public float coord(int overlap, int maxOverlap) { - return 1.0f; - } - }; - } - return result; - } - /** * Specifies a minimum number of the optional BooleanClauses * which must be satisfied. @@ -175,13 +162,15 @@ public class BooleanQuery extends Query implements Iterable { */ protected class BooleanWeight extends Weight { /** The Similarity implementation. */ - protected Similarity similarity; + protected SimilarityProvider similarityProvider; protected ArrayList weights; protected int maxCoord; // num optional + num required + private final boolean disableCoord; - public BooleanWeight(IndexSearcher searcher) + public BooleanWeight(IndexSearcher searcher, boolean disableCoord) throws IOException { - this.similarity = getSimilarity(searcher); + this.similarityProvider = searcher.getSimilarityProvider(); + this.disableCoord = disableCoord; weights = new ArrayList(clauses.size()); for (int i = 0 ; i < clauses.size(); i++) { BooleanClause c = clauses.get(i); @@ -212,6 +201,9 @@ public class BooleanQuery extends Query implements Iterable { return sum ; } + public float coord(int overlap, int maxOverlap) { + return similarityProvider.coord(overlap, maxOverlap); + } @Override public void normalize(float norm) { @@ -223,7 +215,7 @@ public class BooleanQuery extends Query implements Iterable { } @Override - public Explanation explain(IndexReader reader, int doc) + public Explanation explain(AtomicReaderContext context, int doc) throws IOException { final int minShouldMatch = BooleanQuery.this.getMinimumNumberShouldMatch(); @@ -237,7 +229,7 @@ public class BooleanQuery extends Query implements Iterable { for (Iterator wIter = weights.iterator(); wIter.hasNext();) { Weight w = wIter.next(); BooleanClause c = cIter.next(); - if (w.scorer(reader, true, true) == null) { + if (w.scorer(context, ScorerContext.def().scoreDocsInOrder(true).topScorer(true)) == null) { if (c.isRequired()) { fail = true; Explanation r = new Explanation(0.0f, "no match on required clause (" + c.getQuery().toString() + ")"); @@ -245,7 +237,7 @@ public class BooleanQuery extends Query implements Iterable { } continue; } - Explanation e = w.explain(reader, doc); + Explanation e = w.explain(context, doc); if (e.isMatch()) { if (!c.isProhibited()) { sumExpl.addDetail(e); @@ -284,10 +276,10 @@ public class BooleanQuery extends Query implements Iterable { sumExpl.setMatch(0 < coord ? Boolean.TRUE : Boolean.FALSE); sumExpl.setValue(sum); - float coordFactor = similarity.coord(coord, maxCoord); - if (coordFactor == 1.0f) // coord is no-op + final float coordFactor = disableCoord ? 1.0f : coord(coord, maxCoord); + if (coordFactor == 1.0f) { return sumExpl; // eliminate wrapper - else { + } else { ComplexExplanation result = new ComplexExplanation(sumExpl.isMatch(), sum*coordFactor, "product of:"); @@ -299,7 +291,7 @@ public class BooleanQuery extends Query implements Iterable { } @Override - public Scorer scorer(IndexReader reader, boolean scoreDocsInOrder, boolean topScorer) + public Scorer scorer(AtomicReaderContext context, ScorerContext scorerContext) throws IOException { List required = new ArrayList(); List prohibited = new ArrayList(); @@ -307,7 +299,7 @@ public class BooleanQuery extends Query implements Iterable { Iterator cIter = clauses.iterator(); for (Weight w : weights) { BooleanClause c = cIter.next(); - Scorer subScorer = w.scorer(reader, true, false); + Scorer subScorer = w.scorer(context, ScorerContext.def()); if (subScorer == null) { if (c.isRequired()) { return null; @@ -322,8 +314,8 @@ public class BooleanQuery extends Query implements Iterable { } // Check if we can return a BooleanScorer - if (!scoreDocsInOrder && topScorer && required.size() == 0 && prohibited.size() < 32) { - return new BooleanScorer(this, similarity, minNrShouldMatch, optional, prohibited, maxCoord); + if (!scorerContext.scoreDocsInOrder && scorerContext.topScorer && required.size() == 0 && prohibited.size() < 32) { + return new BooleanScorer(this, disableCoord, minNrShouldMatch, optional, prohibited, maxCoord); } if (required.size() == 0 && optional.size() == 0) { @@ -337,7 +329,7 @@ public class BooleanQuery extends Query implements Iterable { } // Return a BooleanScorer2 - return new BooleanScorer2(this, similarity, minNrShouldMatch, required, prohibited, optional, maxCoord); + return new BooleanScorer2(this, disableCoord, minNrShouldMatch, required, prohibited, optional, maxCoord); } @Override @@ -363,7 +355,7 @@ public class BooleanQuery extends Query implements Iterable { @Override public Weight createWeight(IndexSearcher searcher) throws IOException { - return new BooleanWeight(searcher); + return new BooleanWeight(searcher, disableCoord); } @Override diff --git a/lucene/src/java/org/apache/lucene/search/BooleanScorer.java b/lucene/src/java/org/apache/lucene/search/BooleanScorer.java index 3a90fe023ce..18978c36006 100644 --- a/lucene/src/java/org/apache/lucene/search/BooleanScorer.java +++ b/lucene/src/java/org/apache/lucene/search/BooleanScorer.java @@ -20,8 +20,9 @@ package org.apache.lucene.search; import java.io.IOException; import java.util.List; -import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexReader.AtomicReaderContext; import org.apache.lucene.search.BooleanClause.Occur; +import org.apache.lucene.search.BooleanQuery.BooleanWeight; /* Description from Doug Cutting (excerpted from * LUCENE-1483): @@ -92,7 +93,7 @@ final class BooleanScorer extends Scorer { } @Override - public void setNextReader(IndexReader reader, int docBase) { + public void setNextReader(AtomicReaderContext context) { // not needed by this implementation } @@ -118,7 +119,7 @@ final class BooleanScorer extends Scorer { int doc = NO_MORE_DOCS; int freq; - public BucketScorer() { super(null); } + public BucketScorer(Weight weight) { super(weight); } @Override public int advance(int target) throws IOException { return NO_MORE_DOCS; } @@ -197,9 +198,9 @@ final class BooleanScorer extends Scorer { private Bucket current; private int doc = -1; - BooleanScorer(Weight weight, Similarity similarity, int minNrShouldMatch, + BooleanScorer(BooleanWeight weight, boolean disableCoord, int minNrShouldMatch, List optionalScorers, List prohibitedScorers, int maxCoord) throws IOException { - super(similarity, weight); + super(weight); this.minNrShouldMatch = minNrShouldMatch; if (optionalScorers != null && optionalScorers.size() > 0) { @@ -222,18 +223,17 @@ final class BooleanScorer extends Scorer { } coordFactors = new float[optionalScorers.size() + 1]; - Similarity sim = getSimilarity(); for (int i = 0; i < coordFactors.length; i++) { - coordFactors[i] = sim.coord(i, maxCoord); + coordFactors[i] = disableCoord ? 1.0f : weight.coord(i, maxCoord); } } // firstDocID is ignored since nextDoc() initializes 'current' @Override - protected boolean score(Collector collector, int max, int firstDocID) throws IOException { + public boolean score(Collector collector, int max, int firstDocID) throws IOException { boolean more; Bucket tmp; - BucketScorer bs = new BucketScorer(); + BucketScorer bs = new BucketScorer(weight); // The internal loop will set the score and doc before calling collect. collector.setScorer(bs); do { diff --git a/lucene/src/java/org/apache/lucene/search/BooleanScorer2.java b/lucene/src/java/org/apache/lucene/search/BooleanScorer2.java index c8dcf2eba20..9c8ac60cbf7 100644 --- a/lucene/src/java/org/apache/lucene/search/BooleanScorer2.java +++ b/lucene/src/java/org/apache/lucene/search/BooleanScorer2.java @@ -22,6 +22,7 @@ import java.util.ArrayList; import java.util.List; import org.apache.lucene.search.BooleanClause.Occur; +import org.apache.lucene.search.BooleanQuery.BooleanWeight; /* See the description in BooleanScorer.java, comparing * BooleanScorer & BooleanScorer2 */ @@ -42,14 +43,12 @@ class BooleanScorer2 extends Scorer { int maxCoord = 0; // to be increased for each non prohibited scorer int nrMatchers; // to be increased by score() of match counting scorers. - void init() { // use after all scorers have been added. + void init(boolean disableCoord) { // use after all scorers have been added. coordFactors = new float[optionalScorers.size() + requiredScorers.size() + 1]; - Similarity sim = getSimilarity(); for (int i = 0; i < coordFactors.length; i++) { - coordFactors[i] = sim.coord(i, maxCoord); + coordFactors[i] = disableCoord ? 1.0f : ((BooleanWeight)weight).coord(i, maxCoord); } } - } private final Coordinator coordinator; @@ -69,8 +68,11 @@ class BooleanScorer2 extends Scorer { * prohibited and optional scorers. In no required scorers are added, at least * one of the optional scorers will have to match during the search. * - * @param similarity - * The similarity to be used. + * @param weight + * The BooleanWeight to be used. + * @param disableCoord + * If this parameter is true, coordination level matching + * ({@link Similarity#coord(int, int)}) is not used. * @param minNrShouldMatch * The minimum number of optional added scorers that should match * during the search. In case no required scorers are added, at least @@ -82,9 +84,9 @@ class BooleanScorer2 extends Scorer { * @param optional * the list of optional scorers. */ - public BooleanScorer2(Weight weight, Similarity similarity, int minNrShouldMatch, + public BooleanScorer2(BooleanWeight weight, boolean disableCoord, int minNrShouldMatch, List required, List prohibited, List optional, int maxCoord) throws IOException { - super(similarity, weight); + super(weight); if (minNrShouldMatch < 0) { throw new IllegalArgumentException("Minimum number of optional scorers should not be negative"); } @@ -96,8 +98,8 @@ class BooleanScorer2 extends Scorer { requiredScorers = required; prohibitedScorers = prohibited; - coordinator.init(); - countingSumScorer = makeCountingSumScorer(); + coordinator.init(disableCoord); + countingSumScorer = makeCountingSumScorer(disableCoord); } /** Count a scorer as a single match. */ @@ -109,7 +111,7 @@ class BooleanScorer2 extends Scorer { private float lastDocScore = Float.NaN; SingleMatchScorer(Scorer scorer) { - super(scorer.getSimilarity()); + super(scorer.weight); this.scorer = scorer; } @@ -145,7 +147,7 @@ class BooleanScorer2 extends Scorer { private Scorer countingDisjunctionSumScorer(final List scorers, int minNrShouldMatch) throws IOException { // each scorer from the list counted as a single matcher - return new DisjunctionSumScorer(scorers, minNrShouldMatch) { + return new DisjunctionSumScorer(weight, scorers, minNrShouldMatch) { private int lastScoredDoc = -1; // Save the score of lastScoredDoc, so that we don't compute it more than // once in score(). @@ -164,12 +166,11 @@ class BooleanScorer2 extends Scorer { }; } - private static final Similarity defaultSimilarity = Similarity.getDefault(); - - private Scorer countingConjunctionSumScorer(List requiredScorers) throws IOException { + private Scorer countingConjunctionSumScorer(boolean disableCoord, + List requiredScorers) throws IOException { // each scorer from the list counted as a single matcher final int requiredNrMatchers = requiredScorers.size(); - return new ConjunctionScorer(defaultSimilarity, requiredScorers) { + return new ConjunctionScorer(weight, disableCoord ? 1.0f : ((BooleanWeight)weight).coord(requiredScorers.size(), requiredScorers.size()), requiredScorers) { private int lastScoredDoc = -1; // Save the score of lastScoredDoc, so that we don't compute it more than // once in score(). @@ -192,8 +193,9 @@ class BooleanScorer2 extends Scorer { }; } - private Scorer dualConjunctionSumScorer(Scorer req1, Scorer req2) throws IOException { // non counting. - return new ConjunctionScorer(defaultSimilarity, req1, req2); + private Scorer dualConjunctionSumScorer(boolean disableCoord, + Scorer req1, Scorer req2) throws IOException { // non counting. + return new ConjunctionScorer(weight, disableCoord ? 1.0f : ((BooleanWeight)weight).coord(2, 2), req1, req2); // All scorers match, so defaultSimilarity always has 1 as // the coordination factor. // Therefore the sum of the scores of two scorers @@ -203,13 +205,13 @@ class BooleanScorer2 extends Scorer { /** Returns the scorer to be used for match counting and score summing. * Uses requiredScorers, optionalScorers and prohibitedScorers. */ - private Scorer makeCountingSumScorer() throws IOException { // each scorer counted as a single matcher + private Scorer makeCountingSumScorer(boolean disableCoord) throws IOException { // each scorer counted as a single matcher return (requiredScorers.size() == 0) - ? makeCountingSumScorerNoReq() - : makeCountingSumScorerSomeReq(); + ? makeCountingSumScorerNoReq(disableCoord) + : makeCountingSumScorerSomeReq(disableCoord); } - private Scorer makeCountingSumScorerNoReq() throws IOException { // No required scorers + private Scorer makeCountingSumScorerNoReq(boolean disableCoord) throws IOException { // No required scorers // minNrShouldMatch optional scorers are required, but at least 1 int nrOptRequired = (minNrShouldMatch < 1) ? 1 : minNrShouldMatch; Scorer requiredCountingSumScorer; @@ -217,24 +219,26 @@ class BooleanScorer2 extends Scorer { requiredCountingSumScorer = countingDisjunctionSumScorer(optionalScorers, nrOptRequired); else if (optionalScorers.size() == 1) requiredCountingSumScorer = new SingleMatchScorer(optionalScorers.get(0)); - else - requiredCountingSumScorer = countingConjunctionSumScorer(optionalScorers); + else { + requiredCountingSumScorer = countingConjunctionSumScorer(disableCoord, optionalScorers); + } return addProhibitedScorers(requiredCountingSumScorer); } - private Scorer makeCountingSumScorerSomeReq() throws IOException { // At least one required scorer. + private Scorer makeCountingSumScorerSomeReq(boolean disableCoord) throws IOException { // At least one required scorer. if (optionalScorers.size() == minNrShouldMatch) { // all optional scorers also required. ArrayList allReq = new ArrayList(requiredScorers); allReq.addAll(optionalScorers); - return addProhibitedScorers(countingConjunctionSumScorer(allReq)); + return addProhibitedScorers(countingConjunctionSumScorer(disableCoord, allReq)); } else { // optionalScorers.size() > minNrShouldMatch, and at least one required scorer Scorer requiredCountingSumScorer = requiredScorers.size() == 1 ? new SingleMatchScorer(requiredScorers.get(0)) - : countingConjunctionSumScorer(requiredScorers); + : countingConjunctionSumScorer(disableCoord, requiredScorers); if (minNrShouldMatch > 0) { // use a required disjunction scorer over the optional scorers return addProhibitedScorers( dualConjunctionSumScorer( // non counting + disableCoord, requiredCountingSumScorer, countingDisjunctionSumScorer( optionalScorers, @@ -261,7 +265,7 @@ class BooleanScorer2 extends Scorer { : new ReqExclScorer(requiredCountingSumScorer, ((prohibitedScorers.size() == 1) ? prohibitedScorers.get(0) - : new DisjunctionSumScorer(prohibitedScorers))); + : new DisjunctionSumScorer(weight, prohibitedScorers))); } /** Scores and collects all matching documents. @@ -276,7 +280,7 @@ class BooleanScorer2 extends Scorer { } @Override - protected boolean score(Collector collector, int max, int firstDocID) throws IOException { + public boolean score(Collector collector, int max, int firstDocID) throws IOException { doc = firstDocID; collector.setScorer(this); while (doc < max) { diff --git a/lucene/src/java/org/apache/lucene/search/BoostAttribute.java b/lucene/src/java/org/apache/lucene/search/BoostAttribute.java index 93a2d3916bf..58f44633b8d 100644 --- a/lucene/src/java/org/apache/lucene/search/BoostAttribute.java +++ b/lucene/src/java/org/apache/lucene/search/BoostAttribute.java @@ -21,13 +21,13 @@ import org.apache.lucene.util.Attribute; import org.apache.lucene.util.AttributeSource; // javadocs only import org.apache.lucene.index.TermsEnum; // javadocs only -/** Add this {@link Attribute} to a {@link TermsEnum} returned by {@link MultiTermQuery#getTermsEnum(IndexReader,AttributeSource)} +/** Add this {@link Attribute} to a {@link TermsEnum} returned by {@link MultiTermQuery#getTermsEnum(Terms,AttributeSource)} * and update the boost on each returned term. This enables to control the boost factor * for each matching term in {@link MultiTermQuery#SCORING_BOOLEAN_QUERY_REWRITE} or * {@link TopTermsRewrite} mode. * {@link FuzzyQuery} is using this to take the edit distance into account. *

      Please note: This attribute is intended to be added only by the TermsEnum - * to itsself in its constructor and consumed by the {@link MultiTermQuery.RewriteMethod}. + * to itself in its constructor and consumed by the {@link MultiTermQuery.RewriteMethod}. * @lucene.internal */ public interface BoostAttribute extends Attribute { diff --git a/lucene/src/java/org/apache/lucene/search/BoostAttributeImpl.java b/lucene/src/java/org/apache/lucene/search/BoostAttributeImpl.java index 28ce30ee023..f07909021e5 100644 --- a/lucene/src/java/org/apache/lucene/search/BoostAttributeImpl.java +++ b/lucene/src/java/org/apache/lucene/search/BoostAttributeImpl.java @@ -37,20 +37,6 @@ public final class BoostAttributeImpl extends AttributeImpl implements BoostAttr public void clear() { boost = 1.0f; } - - @Override - public boolean equals(Object other) { - if (this == other) - return true; - if (other instanceof BoostAttributeImpl) - return ((BoostAttributeImpl) other).boost == boost; - return false; - } - - @Override - public int hashCode() { - return Float.floatToIntBits(boost); - } @Override public void copyTo(AttributeImpl target) { diff --git a/lucene/src/java/org/apache/lucene/search/CachingSpanFilter.java b/lucene/src/java/org/apache/lucene/search/CachingSpanFilter.java index d19c872ee58..e1341fe493a 100644 --- a/lucene/src/java/org/apache/lucene/search/CachingSpanFilter.java +++ b/lucene/src/java/org/apache/lucene/search/CachingSpanFilter.java @@ -17,6 +17,7 @@ package org.apache.lucene.search; import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexReader.AtomicReaderContext; import org.apache.lucene.util.Bits; import java.io.IOException; @@ -60,15 +61,16 @@ public class CachingSpanFilter extends SpanFilter { } @Override - public DocIdSet getDocIdSet(IndexReader reader) throws IOException { - SpanFilterResult result = getCachedResult(reader); + public DocIdSet getDocIdSet(AtomicReaderContext context) throws IOException { + SpanFilterResult result = getCachedResult(context); return result != null ? result.getDocIdSet() : null; } // for testing int hitCount, missCount; - private SpanFilterResult getCachedResult(IndexReader reader) throws IOException { + private SpanFilterResult getCachedResult(AtomicReaderContext context) throws IOException { + final IndexReader reader = context.reader; final Object coreKey = reader.getCoreCacheKey(); final Object delCoreKey = reader.hasDeletions() ? reader.getDeletedDocs() : coreKey; @@ -80,7 +82,7 @@ public class CachingSpanFilter extends SpanFilter { } missCount++; - result = filter.bitSpans(reader); + result = filter.bitSpans(context); cache.put(coreKey, delCoreKey, result); return result; @@ -88,8 +90,8 @@ public class CachingSpanFilter extends SpanFilter { @Override - public SpanFilterResult bitSpans(IndexReader reader) throws IOException { - return getCachedResult(reader); + public SpanFilterResult bitSpans(AtomicReaderContext context) throws IOException { + return getCachedResult(context); } @Override diff --git a/lucene/src/java/org/apache/lucene/search/CachingWrapperFilter.java b/lucene/src/java/org/apache/lucene/search/CachingWrapperFilter.java index d51eed25172..1f865670b56 100644 --- a/lucene/src/java/org/apache/lucene/search/CachingWrapperFilter.java +++ b/lucene/src/java/org/apache/lucene/search/CachingWrapperFilter.java @@ -23,6 +23,7 @@ import java.util.Map; import java.util.WeakHashMap; import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexReader.AtomicReaderContext; import org.apache.lucene.util.OpenBitSetDISI; import org.apache.lucene.util.Bits; @@ -37,6 +38,9 @@ import org.apache.lucene.util.Bits; * {@link DeletesMode#DYNAMIC}). */ public class CachingWrapperFilter extends Filter { + // TODO: make this filter aware of ReaderContext. a cached filter could + // specify the actual readers key or something similar to indicate on which + // level of the readers hierarchy it should be cached. Filter filter; /** @@ -191,8 +195,8 @@ public class CachingWrapperFilter extends Filter { int hitCount, missCount; @Override - public DocIdSet getDocIdSet(IndexReader reader) throws IOException { - + public DocIdSet getDocIdSet(AtomicReaderContext context) throws IOException { + final IndexReader reader = context.reader; final Object coreKey = reader.getCoreCacheKey(); final Object delCoreKey = reader.hasDeletions() ? reader.getDeletedDocs() : coreKey; @@ -205,7 +209,7 @@ public class CachingWrapperFilter extends Filter { missCount++; // cache miss - docIdSet = docIdSetToCache(filter.getDocIdSet(reader), reader); + docIdSet = docIdSetToCache(filter.getDocIdSet(context), reader); if (docIdSet != null) { cache.put(coreKey, delCoreKey, docIdSet); diff --git a/lucene/src/java/org/apache/lucene/search/Collector.java b/lucene/src/java/org/apache/lucene/search/Collector.java index e2514887930..b64abce0f4b 100644 --- a/lucene/src/java/org/apache/lucene/search/Collector.java +++ b/lucene/src/java/org/apache/lucene/search/Collector.java @@ -19,7 +19,8 @@ package org.apache.lucene.search; import java.io.IOException; -import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexReader.AtomicReaderContext; +import org.apache.lucene.index.IndexReader.ReaderContext; /** *

      Expert: Collectors are primarily meant to be used to @@ -98,8 +99,8 @@ import org.apache.lucene.index.IndexReader; * bits.set(doc + docBase); * } * - * public void setNextReader(IndexReader reader, int docBase) { - * this.docBase = docBase; + * public void setNextReader(AtomicReaderContext context) { + * this.docBase = context.docBase; * } * }); *

    @@ -136,24 +137,23 @@ public abstract class Collector { * *

    * Note: This is called in an inner search loop. For good search performance, - * implementations of this method should not call {@link Searcher#doc(int)} or + * implementations of this method should not call {@link IndexSearcher#doc(int)} or * {@link org.apache.lucene.index.IndexReader#document(int)} on every hit. * Doing so can slow searches by an order of magnitude or more. */ public abstract void collect(int doc) throws IOException; /** - * Called before collecting from each IndexReader. All doc ids in - * {@link #collect(int)} will correspond to reader. + * Called before collecting from each {@link AtomicReaderContext}. All doc ids in + * {@link #collect(int)} will correspond to {@link ReaderContext#reader}. * - * Add docBase to the current IndexReaders internal document id to re-base ids - * in {@link #collect(int)}. + * Add {@link AtomicReaderContext#docBase} to the current {@link ReaderContext#reader}'s + * internal document id to re-base ids in {@link #collect(int)}. * - * @param reader - * next IndexReader - * @param docBase + * @param context + * next atomic reader context */ - public abstract void setNextReader(IndexReader reader, int docBase) throws IOException; + public abstract void setNextReader(AtomicReaderContext context) throws IOException; /** * Return true if this collector does not diff --git a/lucene/src/java/org/apache/lucene/search/ConjunctionScorer.java b/lucene/src/java/org/apache/lucene/search/ConjunctionScorer.java index dd254755a31..b8dea2565b9 100644 --- a/lucene/src/java/org/apache/lucene/search/ConjunctionScorer.java +++ b/lucene/src/java/org/apache/lucene/search/ConjunctionScorer.java @@ -29,14 +29,14 @@ class ConjunctionScorer extends Scorer { private final float coord; private int lastDoc = -1; - public ConjunctionScorer(Similarity similarity, Collection scorers) throws IOException { - this(similarity, scorers.toArray(new Scorer[scorers.size()])); + public ConjunctionScorer(Weight weight, float coord, Collection scorers) throws IOException { + this(weight, coord, scorers.toArray(new Scorer[scorers.size()])); } - public ConjunctionScorer(Similarity similarity, Scorer... scorers) throws IOException { - super(similarity); + public ConjunctionScorer(Weight weight, float coord, Scorer... scorers) throws IOException { + super(weight); this.scorers = scorers; - coord = similarity.coord(scorers.length, scorers.length); + this.coord = coord; for (int i = 0; i < scorers.length; i++) { if (scorers[i].nextDoc() == NO_MORE_DOCS) { diff --git a/lucene/src/java/org/apache/lucene/search/ConstantScoreAutoRewrite.java b/lucene/src/java/org/apache/lucene/search/ConstantScoreAutoRewrite.java index 6dd17bf3645..64aef2b3cf5 100644 --- a/lucene/src/java/org/apache/lucene/search/ConstantScoreAutoRewrite.java +++ b/lucene/src/java/org/apache/lucene/search/ConstantScoreAutoRewrite.java @@ -21,9 +21,15 @@ import java.io.IOException; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Term; +import org.apache.lucene.index.TermState; import org.apache.lucene.index.TermsEnum; +import org.apache.lucene.util.ArrayUtil; +import org.apache.lucene.util.ByteBlockPool; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRefHash; +import org.apache.lucene.util.PerReaderTermState; +import org.apache.lucene.util.RamUsageEstimator; +import org.apache.lucene.util.BytesRefHash.DirectBytesStartArray; class ConstantScoreAutoRewrite extends TermCollectingRewrite { @@ -71,8 +77,8 @@ class ConstantScoreAutoRewrite extends TermCollectingRewrite { } @Override - protected void addClause(BooleanQuery topLevel, Term term, int docFreq, float boost /*ignored*/) { - topLevel.add(new TermQuery(term, docFreq), BooleanClause.Occur.SHOULD); + protected void addClause(BooleanQuery topLevel, Term term, int docFreq, float boost /*ignored*/, PerReaderTermState states) { + topLevel.add(new TermQuery(term, states), BooleanClause.Occur.SHOULD); } @Override @@ -98,9 +104,10 @@ class ConstantScoreAutoRewrite extends TermCollectingRewrite { final BytesRefHash pendingTerms = col.pendingTerms; final int sort[] = pendingTerms.sort(col.termsEnum.getComparator()); for(int i = 0; i < size; i++) { + final int pos = sort[i]; // docFreq is not used for constant score here, we pass 1 // to explicitely set a fake value, so it's not calculated - addClause(bq, placeholderTerm.createTerm(pendingTerms.get(sort[i], new BytesRef())), 1, 1.0f); + addClause(bq, placeholderTerm.createTerm(pendingTerms.get(pos, new BytesRef())), 1, 1.0f, col.array.termState[pos]); } // Strip scores final Query result = new ConstantScoreQuery(bq); @@ -123,12 +130,21 @@ class ConstantScoreAutoRewrite extends TermCollectingRewrite { @Override public boolean collect(BytesRef bytes) throws IOException { - pendingTerms.add(bytes); + int pos = pendingTerms.add(bytes); docVisitCount += termsEnum.docFreq(); if (pendingTerms.size() >= termCountLimit || docVisitCount >= docCountCutoff) { hasCutOff = true; return false; } + + final TermState termState = termsEnum.termState(); + assert termState != null; + if (pos < 0) { + pos = (-pos)-1; + array.termState[pos].register(termState, readerContext.ord, termsEnum.docFreq()); + } else { + array.termState[pos] = new PerReaderTermState(topReaderContext, termState, readerContext.ord, termsEnum.docFreq()); + } return true; } @@ -137,7 +153,8 @@ class ConstantScoreAutoRewrite extends TermCollectingRewrite { TermsEnum termsEnum; final int docCountCutoff, termCountLimit; - final BytesRefHash pendingTerms = new BytesRefHash(); + final TermStateByteStart array = new TermStateByteStart(16); + final BytesRefHash pendingTerms = new BytesRefHash(new ByteBlockPool(new ByteBlockPool.DirectAllocator()), 16, array); } @Override @@ -166,4 +183,40 @@ class ConstantScoreAutoRewrite extends TermCollectingRewrite { return true; } + + /** Special implementation of BytesStartArray that keeps parallel arrays for {@link PerReaderTermState} */ + static final class TermStateByteStart extends DirectBytesStartArray { + PerReaderTermState[] termState; + + public TermStateByteStart(int initSize) { + super(initSize); + } + + @Override + public int[] init() { + final int[] ord = super.init(); + termState = new PerReaderTermState[ArrayUtil.oversize(ord.length, RamUsageEstimator.NUM_BYTES_OBJECT_REF)]; + assert termState.length >= ord.length; + return ord; + } + + @Override + public int[] grow() { + final int[] ord = super.grow(); + if (termState.length < ord.length) { + PerReaderTermState[] tmpTermState = new PerReaderTermState[ArrayUtil.oversize(ord.length, RamUsageEstimator.NUM_BYTES_OBJECT_REF)]; + System.arraycopy(termState, 0, tmpTermState, 0, termState.length); + termState = tmpTermState; + } + assert termState.length >= ord.length; + return ord; + } + + @Override + public int[] clear() { + termState = null; + return super.clear(); + } + + } } diff --git a/lucene/src/java/org/apache/lucene/search/ConstantScoreQuery.java b/lucene/src/java/org/apache/lucene/search/ConstantScoreQuery.java index fe76121d3c2..d5f5f50389b 100644 --- a/lucene/src/java/org/apache/lucene/search/ConstantScoreQuery.java +++ b/lucene/src/java/org/apache/lucene/search/ConstantScoreQuery.java @@ -18,6 +18,7 @@ package org.apache.lucene.search; */ import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexReader.AtomicReaderContext; import org.apache.lucene.index.Term; import org.apache.lucene.util.ToStringUtils; @@ -96,12 +97,10 @@ public class ConstantScoreQuery extends Query { protected class ConstantWeight extends Weight { private final Weight innerWeight; - private final Similarity similarity; private float queryNorm; private float queryWeight; public ConstantWeight(IndexSearcher searcher) throws IOException { - this.similarity = getSimilarity(searcher); this.innerWeight = (query == null) ? null : query.createWeight(searcher); } @@ -132,22 +131,22 @@ public class ConstantScoreQuery extends Query { } @Override - public Scorer scorer(IndexReader reader, boolean scoreDocsInOrder, boolean topScorer) throws IOException { + public Scorer scorer(AtomicReaderContext context, ScorerContext scorerContext) throws IOException { final DocIdSetIterator disi; if (filter != null) { assert query == null; - final DocIdSet dis = filter.getDocIdSet(reader); + final DocIdSet dis = filter.getDocIdSet(context); if (dis == null) return null; disi = dis.iterator(); } else { assert query != null && innerWeight != null; disi = - innerWeight.scorer(reader, scoreDocsInOrder, topScorer); + innerWeight.scorer(context, scorerContext); } if (disi == null) return null; - return new ConstantScorer(similarity, disi, this); + return new ConstantScorer(disi, this); } @Override @@ -156,8 +155,8 @@ public class ConstantScoreQuery extends Query { } @Override - public Explanation explain(IndexReader reader, int doc) throws IOException { - final Scorer cs = scorer(reader, true, false); + public Explanation explain(AtomicReaderContext context, int doc) throws IOException { + final Scorer cs = scorer(context, ScorerContext.def()); final boolean exists = (cs != null && cs.advance(doc) == doc); final ComplexExplanation result = new ComplexExplanation(); @@ -180,8 +179,8 @@ public class ConstantScoreQuery extends Query { final DocIdSetIterator docIdSetIterator; final float theScore; - public ConstantScorer(Similarity similarity, DocIdSetIterator docIdSetIterator, Weight w) throws IOException { - super(similarity,w); + public ConstantScorer(DocIdSetIterator docIdSetIterator, Weight w) throws IOException { + super(w); theScore = w.getValue(); this.docIdSetIterator = docIdSetIterator; } @@ -211,8 +210,7 @@ public class ConstantScoreQuery extends Query { @Override public void setScorer(Scorer scorer) throws IOException { // we must wrap again here, but using the scorer passed in as parameter: - collector.setScorer(new ConstantScorer(ConstantScorer.this.getSimilarity(), - scorer, ConstantScorer.this.weight)); + collector.setScorer(new ConstantScorer(scorer, ConstantScorer.this.weight)); } @Override @@ -221,8 +219,8 @@ public class ConstantScoreQuery extends Query { } @Override - public void setNextReader(IndexReader reader, int docBase) throws IOException { - collector.setNextReader(reader, docBase); + public void setNextReader(AtomicReaderContext context) throws IOException { + collector.setNextReader(context); } @Override @@ -243,10 +241,8 @@ public class ConstantScoreQuery extends Query { } // this optimization allows out of order scoring as top scorer, - // TODO: theoretically this method should not be called because its protected and - // this class does not use it, it should be public in Scorer! @Override - protected boolean score(Collector collector, int max, int firstDocID) throws IOException { + public boolean score(Collector collector, int max, int firstDocID) throws IOException { if (docIdSetIterator instanceof Scorer) { return ((Scorer) docIdSetIterator).score(wrapCollector(collector), max, firstDocID); } else { diff --git a/lucene/src/java/org/apache/lucene/search/DefaultSimilarity.java b/lucene/src/java/org/apache/lucene/search/DefaultSimilarity.java index 0ab551ad8b2..71c8a229089 100644 --- a/lucene/src/java/org/apache/lucene/search/DefaultSimilarity.java +++ b/lucene/src/java/org/apache/lucene/search/DefaultSimilarity.java @@ -20,7 +20,7 @@ import org.apache.lucene.index.FieldInvertState; */ /** Expert: Default scoring implementation. */ -public class DefaultSimilarity extends Similarity { +public class DefaultSimilarity extends Similarity implements SimilarityProvider { /** Implemented as * state.getBoost()*lengthNorm(numTerms), where @@ -37,17 +37,10 @@ public class DefaultSimilarity extends Similarity { numTerms = state.getLength() - state.getNumOverlap(); else numTerms = state.getLength(); - return (state.getBoost() * lengthNorm(field, numTerms)); - } - - /** Implemented as 1/sqrt(numTerms). */ - @Override - public float lengthNorm(String fieldName, int numTerms) { - return (float)(1.0 / Math.sqrt(numTerms)); + return state.getBoost() * ((float) (1.0 / Math.sqrt(numTerms))); } /** Implemented as 1/sqrt(sumOfSquaredWeights). */ - @Override public float queryNorm(float sumOfSquaredWeights) { return (float)(1.0 / Math.sqrt(sumOfSquaredWeights)); } @@ -71,7 +64,6 @@ public class DefaultSimilarity extends Similarity { } /** Implemented as overlap / maxOverlap. */ - @Override public float coord(int overlap, int maxOverlap) { return overlap / (float)maxOverlap; } @@ -96,4 +88,12 @@ public class DefaultSimilarity extends Similarity { public boolean getDiscountOverlaps() { return discountOverlaps; } + + /** + * Returns this default implementation for all fields. + * Override this method to customize scoring on a per-field basis. + */ + public Similarity get(String field) { + return this; + } } diff --git a/lucene/src/java/org/apache/lucene/search/DisjunctionMaxQuery.java b/lucene/src/java/org/apache/lucene/search/DisjunctionMaxQuery.java index b6cd0295247..0434232035e 100644 --- a/lucene/src/java/org/apache/lucene/search/DisjunctionMaxQuery.java +++ b/lucene/src/java/org/apache/lucene/search/DisjunctionMaxQuery.java @@ -23,6 +23,7 @@ import java.util.Iterator; import java.util.Set; import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexReader.AtomicReaderContext; import org.apache.lucene.index.Term; /** @@ -94,29 +95,26 @@ public class DisjunctionMaxQuery extends Query implements Iterable { * change suddenly in the next release.

    */ protected class DisjunctionMaxWeight extends Weight { - /** The Similarity implementation. */ - protected Similarity similarity; /** The Weights for our subqueries, in 1-1 correspondence with disjuncts */ protected ArrayList weights = new ArrayList(); // The Weight's for our subqueries, in 1-1 correspondence with disjuncts - /* Construct the Weight for this Query searched by searcher. Recursively construct subquery weights. */ + /** Construct the Weight for this Query searched by searcher. Recursively construct subquery weights. */ public DisjunctionMaxWeight(IndexSearcher searcher) throws IOException { - this.similarity = searcher.getSimilarity(); for (Query disjunctQuery : disjuncts) { weights.add(disjunctQuery.createWeight(searcher)); } } - /* Return our associated DisjunctionMaxQuery */ + /** Return our associated DisjunctionMaxQuery */ @Override public Query getQuery() { return DisjunctionMaxQuery.this; } - /* Return our boost */ + /** Return our boost */ @Override public float getValue() { return getBoost(); } - /* Compute the sub of squared weights of us applied to our subqueries. Used for normalization. */ + /** Compute the sub of squared weights of us applied to our subqueries. Used for normalization. */ @Override public float sumOfSquaredWeights() throws IOException { float max = 0.0f, sum = 0.0f; @@ -130,7 +128,7 @@ public class DisjunctionMaxQuery extends Query implements Iterable { return (((sum - max) * tieBreakerMultiplier * tieBreakerMultiplier) + max) * boost * boost; } - /* Apply the computed normalization factor to our subqueries */ + /** Apply the computed normalization factor to our subqueries */ @Override public void normalize(float norm) { norm *= getBoost(); // Incorporate our boost @@ -139,32 +137,31 @@ public class DisjunctionMaxQuery extends Query implements Iterable { } } - /* Create the scorer used to score our associated DisjunctionMaxQuery */ + /** Create the scorer used to score our associated DisjunctionMaxQuery */ @Override - public Scorer scorer(IndexReader reader, boolean scoreDocsInOrder, - boolean topScorer) throws IOException { + public Scorer scorer(AtomicReaderContext context, ScorerContext scorerContext) throws IOException { Scorer[] scorers = new Scorer[weights.size()]; int idx = 0; for (Weight w : weights) { - Scorer subScorer = w.scorer(reader, true, false); + Scorer subScorer = w.scorer(context, ScorerContext.def()); if (subScorer != null && subScorer.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) { scorers[idx++] = subScorer; } } if (idx == 0) return null; // all scorers did not have documents - DisjunctionMaxScorer result = new DisjunctionMaxScorer(tieBreakerMultiplier, similarity, scorers, idx); + DisjunctionMaxScorer result = new DisjunctionMaxScorer(this, tieBreakerMultiplier, scorers, idx); return result; } - /* Explain the score we computed for doc */ + /** Explain the score we computed for doc */ @Override - public Explanation explain(IndexReader reader, int doc) throws IOException { - if (disjuncts.size() == 1) return weights.get(0).explain(reader,doc); + public Explanation explain(AtomicReaderContext context, int doc) throws IOException { + if (disjuncts.size() == 1) return weights.get(0).explain(context,doc); ComplexExplanation result = new ComplexExplanation(); float max = 0.0f, sum = 0.0f; result.setDescription(tieBreakerMultiplier == 0.0f ? "max of:" : "max plus " + tieBreakerMultiplier + " times others of:"); for (Weight wt : weights) { - Explanation e = wt.explain(reader, doc); + Explanation e = wt.explain(context, doc); if (e.isMatch()) { result.setMatch(Boolean.TRUE); result.addDetail(e); @@ -178,7 +175,7 @@ public class DisjunctionMaxQuery extends Query implements Iterable { } // end of DisjunctionMaxWeight inner class - /* Create the Weight used to score us */ + /** Create the Weight used to score us */ @Override public Weight createWeight(IndexSearcher searcher) throws IOException { return new DisjunctionMaxWeight(searcher); diff --git a/lucene/src/java/org/apache/lucene/search/DisjunctionMaxScorer.java b/lucene/src/java/org/apache/lucene/search/DisjunctionMaxScorer.java index d6f5d2a8a5c..9995062c2fe 100644 --- a/lucene/src/java/org/apache/lucene/search/DisjunctionMaxScorer.java +++ b/lucene/src/java/org/apache/lucene/search/DisjunctionMaxScorer.java @@ -40,22 +40,20 @@ class DisjunctionMaxScorer extends Scorer { /** * Creates a new instance of DisjunctionMaxScorer * + * @param weight + * The Weight to be used. * @param tieBreakerMultiplier * Multiplier applied to non-maximum-scoring subqueries for a * document as they are summed into the result. - * @param similarity - * -- not used since our definition involves neither coord nor terms - * directly * @param subScorers * The sub scorers this Scorer should iterate on * @param numScorers * The actual number of scorers to iterate on. Note that the array's * length may be larger than the actual number of scorers. */ - public DisjunctionMaxScorer(float tieBreakerMultiplier, - Similarity similarity, Scorer[] subScorers, int numScorers) throws IOException { - super(similarity); - + public DisjunctionMaxScorer(Weight weight, float tieBreakerMultiplier, + Scorer[] subScorers, int numScorers) throws IOException { + super(weight); this.tieBreakerMultiplier = tieBreakerMultiplier; // The passed subScorers array includes only scorers which have documents // (DisjunctionMaxQuery takes care of that), and their nextDoc() was already diff --git a/lucene/src/java/org/apache/lucene/search/DisjunctionSumScorer.java b/lucene/src/java/org/apache/lucene/search/DisjunctionSumScorer.java index 7e5016d902b..2f7fa5daf33 100644 --- a/lucene/src/java/org/apache/lucene/search/DisjunctionSumScorer.java +++ b/lucene/src/java/org/apache/lucene/search/DisjunctionSumScorer.java @@ -58,6 +58,7 @@ class DisjunctionSumScorer extends Scorer { private float currentScore = Float.NaN; /** Construct a DisjunctionScorer. + * @param weight The weight to be used. * @param subScorers A collection of at least two subscorers. * @param minimumNrMatchers The positive minimum number of subscorers that should * match to match this query. @@ -67,8 +68,8 @@ class DisjunctionSumScorer extends Scorer { *
    When minimumNrMatchers equals the number of subScorers, * it more efficient to use ConjunctionScorer. */ - public DisjunctionSumScorer( List subScorers, int minimumNrMatchers) throws IOException { - super(null); + public DisjunctionSumScorer(Weight weight, List subScorers, int minimumNrMatchers) throws IOException { + super(weight); nrScorers = subScorers.size(); @@ -88,8 +89,8 @@ class DisjunctionSumScorer extends Scorer { /** Construct a DisjunctionScorer, using one as the minimum number * of matching subscorers. */ - public DisjunctionSumScorer(List subScorers) throws IOException { - this(subScorers, 1); + public DisjunctionSumScorer(Weight weight, List subScorers) throws IOException { + this(weight, subScorers, 1); } /** Called the first time nextDoc() or advance() is called to @@ -123,7 +124,7 @@ class DisjunctionSumScorer extends Scorer { * @return true if more matching documents may remain. */ @Override - protected boolean score(Collector collector, int max, int firstDocID) throws IOException { + public boolean score(Collector collector, int max, int firstDocID) throws IOException { // firstDocID is ignored since nextDoc() sets 'currentDoc' collector.setScorer(this); while (currentDoc < max) { diff --git a/lucene/src/java/org/apache/lucene/search/DocIdSetIterator.java b/lucene/src/java/org/apache/lucene/search/DocIdSetIterator.java index f10d04c0d48..39a73345f9b 100644 --- a/lucene/src/java/org/apache/lucene/search/DocIdSetIterator.java +++ b/lucene/src/java/org/apache/lucene/search/DocIdSetIterator.java @@ -78,10 +78,10 @@ public abstract class DocIdSetIterator { * * Some implementations are considerably more efficient than that. *

    - * NOTE: certain implementations may return a different value (each - * time) if called several times in a row with the same target. + * NOTE: when target ≤ current implementations may opt + * not to advance beyond their current {@link #docID()}. *

    - * NOTE: this method may be called with {@value #NO_MORE_DOCS} for + * NOTE: this method may be called with {@link #NO_MORE_DOCS} for * efficiency by some Scorers. If your implementation cannot efficiently * determine that it should exhaust, it is recommended that you check for that * value in each call to this method. diff --git a/lucene/src/java/org/apache/lucene/search/ExactPhraseScorer.java b/lucene/src/java/org/apache/lucene/search/ExactPhraseScorer.java index f2c94a7ae6f..153821d92d0 100644 --- a/lucene/src/java/org/apache/lucene/search/ExactPhraseScorer.java +++ b/lucene/src/java/org/apache/lucene/search/ExactPhraseScorer.java @@ -60,9 +60,12 @@ final class ExactPhraseScorer extends Scorer { private int docID = -1; private int freq; + private final Similarity similarity; + ExactPhraseScorer(Weight weight, PhraseQuery.PostingsAndFreq[] postings, Similarity similarity, byte[] norms) throws IOException { - super(similarity, weight); + super(weight); + this.similarity = similarity; this.norms = norms; this.value = weight.getValue(); @@ -87,7 +90,7 @@ final class ExactPhraseScorer extends Scorer { } for (int i = 0; i < SCORE_CACHE_SIZE; i++) { - scoreCache[i] = getSimilarity().tf((float) i) * value; + scoreCache[i] = similarity.tf((float) i) * value; } } @@ -207,9 +210,9 @@ final class ExactPhraseScorer extends Scorer { if (freq < SCORE_CACHE_SIZE) { raw = scoreCache[freq]; } else { - raw = getSimilarity().tf((float) freq) * value; + raw = similarity.tf((float) freq) * value; } - return norms == null ? raw : raw * getSimilarity().decodeNormValue(norms[docID]); // normalize + return norms == null ? raw : raw * similarity.decodeNormValue(norms[docID]); // normalize } private int phraseFreq() throws IOException { diff --git a/lucene/src/java/org/apache/lucene/search/FieldCacheImpl.java b/lucene/src/java/org/apache/lucene/search/FieldCacheImpl.java index b583dc6fe78..971d7459840 100644 --- a/lucene/src/java/org/apache/lucene/search/FieldCacheImpl.java +++ b/lucene/src/java/org/apache/lucene/search/FieldCacheImpl.java @@ -137,6 +137,13 @@ public class FieldCacheImpl implements FieldCache { // Made Public so that public Object getValue() { return value; } } + final static IndexReader.ReaderFinishedListener purgeReader = new IndexReader.ReaderFinishedListener() { + // @Override -- not until Java 1.6 + public void finished(IndexReader reader) { + FieldCache.DEFAULT.purge(reader); + } + }; + /** Expert: Internal cache. */ final static class Cache { Cache() { @@ -171,8 +178,10 @@ public class FieldCacheImpl implements FieldCache { // Made Public so that synchronized (readerCache) { innerCache = readerCache.get(readerKey); if (innerCache == null) { + // First time this reader is using FieldCache innerCache = new HashMap,Object>(); readerCache.put(readerKey, innerCache); + reader.addReaderFinishedListener(purgeReader); value = null; } else { value = innerCache.get(key); diff --git a/lucene/src/java/org/apache/lucene/search/FieldCacheRangeFilter.java b/lucene/src/java/org/apache/lucene/search/FieldCacheRangeFilter.java index 6c4245a5d70..9293e509608 100644 --- a/lucene/src/java/org/apache/lucene/search/FieldCacheRangeFilter.java +++ b/lucene/src/java/org/apache/lucene/search/FieldCacheRangeFilter.java @@ -19,6 +19,7 @@ package org.apache.lucene.search; import java.io.IOException; import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexReader.AtomicReaderContext; import org.apache.lucene.index.MultiFields; import org.apache.lucene.util.NumericUtils; import org.apache.lucene.util.Bits; @@ -73,7 +74,7 @@ public abstract class FieldCacheRangeFilter extends Filter { /** This method is implemented for each data type */ @Override - public abstract DocIdSet getDocIdSet(IndexReader reader) throws IOException; + public abstract DocIdSet getDocIdSet(AtomicReaderContext context) throws IOException; /** * Creates a string range filter using {@link FieldCache#getTermsIndex}. This works with all @@ -83,8 +84,8 @@ public abstract class FieldCacheRangeFilter extends Filter { public static FieldCacheRangeFilter newStringRange(String field, String lowerVal, String upperVal, boolean includeLower, boolean includeUpper) { return new FieldCacheRangeFilter(field, null, lowerVal, upperVal, includeLower, includeUpper) { @Override - public DocIdSet getDocIdSet(IndexReader reader) throws IOException { - final FieldCache.DocTermsIndex fcsi = FieldCache.DEFAULT.getTermsIndex(reader, field); + public DocIdSet getDocIdSet(AtomicReaderContext context) throws IOException { + final FieldCache.DocTermsIndex fcsi = FieldCache.DEFAULT.getTermsIndex(context.reader, field); final BytesRef spare = new BytesRef(); final int lowerPoint = fcsi.binarySearchLookup(lowerVal == null ? null : new BytesRef(lowerVal), spare); final int upperPoint = fcsi.binarySearchLookup(upperVal == null ? null : new BytesRef(upperVal), spare); @@ -124,7 +125,7 @@ public abstract class FieldCacheRangeFilter extends Filter { // for this DocIdSet, we can ignore deleted docs // because deleted docs have an order of 0 (null entry in StringIndex) - return new FieldCacheDocIdSet(reader, true) { + return new FieldCacheDocIdSet(context.reader, true) { @Override final boolean matchDoc(int doc) { final int docOrd = fcsi.getOrd(doc); @@ -152,7 +153,7 @@ public abstract class FieldCacheRangeFilter extends Filter { public static FieldCacheRangeFilter newByteRange(String field, FieldCache.ByteParser parser, Byte lowerVal, Byte upperVal, boolean includeLower, boolean includeUpper) { return new FieldCacheRangeFilter(field, parser, lowerVal, upperVal, includeLower, includeUpper) { @Override - public DocIdSet getDocIdSet(IndexReader reader) throws IOException { + public DocIdSet getDocIdSet(AtomicReaderContext context) throws IOException { final byte inclusiveLowerPoint, inclusiveUpperPoint; if (lowerVal != null) { final byte i = lowerVal.byteValue(); @@ -174,9 +175,9 @@ public abstract class FieldCacheRangeFilter extends Filter { if (inclusiveLowerPoint > inclusiveUpperPoint) return DocIdSet.EMPTY_DOCIDSET; - final byte[] values = FieldCache.DEFAULT.getBytes(reader, field, (FieldCache.ByteParser) parser); + final byte[] values = FieldCache.DEFAULT.getBytes(context.reader, field, (FieldCache.ByteParser) parser); // we only respect deleted docs if the range contains 0 - return new FieldCacheDocIdSet(reader, !(inclusiveLowerPoint <= 0 && inclusiveUpperPoint >= 0)) { + return new FieldCacheDocIdSet(context.reader, !(inclusiveLowerPoint <= 0 && inclusiveUpperPoint >= 0)) { @Override boolean matchDoc(int doc) { return values[doc] >= inclusiveLowerPoint && values[doc] <= inclusiveUpperPoint; @@ -203,7 +204,7 @@ public abstract class FieldCacheRangeFilter extends Filter { public static FieldCacheRangeFilter newShortRange(String field, FieldCache.ShortParser parser, Short lowerVal, Short upperVal, boolean includeLower, boolean includeUpper) { return new FieldCacheRangeFilter(field, parser, lowerVal, upperVal, includeLower, includeUpper) { @Override - public DocIdSet getDocIdSet(IndexReader reader) throws IOException { + public DocIdSet getDocIdSet(AtomicReaderContext context) throws IOException { final short inclusiveLowerPoint, inclusiveUpperPoint; if (lowerVal != null) { short i = lowerVal.shortValue(); @@ -225,9 +226,9 @@ public abstract class FieldCacheRangeFilter extends Filter { if (inclusiveLowerPoint > inclusiveUpperPoint) return DocIdSet.EMPTY_DOCIDSET; - final short[] values = FieldCache.DEFAULT.getShorts(reader, field, (FieldCache.ShortParser) parser); + final short[] values = FieldCache.DEFAULT.getShorts(context.reader, field, (FieldCache.ShortParser) parser); // ignore deleted docs if range doesn't contain 0 - return new FieldCacheDocIdSet(reader, !(inclusiveLowerPoint <= 0 && inclusiveUpperPoint >= 0)) { + return new FieldCacheDocIdSet(context.reader, !(inclusiveLowerPoint <= 0 && inclusiveUpperPoint >= 0)) { @Override boolean matchDoc(int doc) { return values[doc] >= inclusiveLowerPoint && values[doc] <= inclusiveUpperPoint; @@ -254,7 +255,7 @@ public abstract class FieldCacheRangeFilter extends Filter { public static FieldCacheRangeFilter newIntRange(String field, FieldCache.IntParser parser, Integer lowerVal, Integer upperVal, boolean includeLower, boolean includeUpper) { return new FieldCacheRangeFilter(field, parser, lowerVal, upperVal, includeLower, includeUpper) { @Override - public DocIdSet getDocIdSet(IndexReader reader) throws IOException { + public DocIdSet getDocIdSet(AtomicReaderContext context) throws IOException { final int inclusiveLowerPoint, inclusiveUpperPoint; if (lowerVal != null) { int i = lowerVal.intValue(); @@ -276,9 +277,9 @@ public abstract class FieldCacheRangeFilter extends Filter { if (inclusiveLowerPoint > inclusiveUpperPoint) return DocIdSet.EMPTY_DOCIDSET; - final int[] values = FieldCache.DEFAULT.getInts(reader, field, (FieldCache.IntParser) parser); + final int[] values = FieldCache.DEFAULT.getInts(context.reader, field, (FieldCache.IntParser) parser); // ignore deleted docs if range doesn't contain 0 - return new FieldCacheDocIdSet(reader, !(inclusiveLowerPoint <= 0 && inclusiveUpperPoint >= 0)) { + return new FieldCacheDocIdSet(context.reader, !(inclusiveLowerPoint <= 0 && inclusiveUpperPoint >= 0)) { @Override boolean matchDoc(int doc) { return values[doc] >= inclusiveLowerPoint && values[doc] <= inclusiveUpperPoint; @@ -305,7 +306,7 @@ public abstract class FieldCacheRangeFilter extends Filter { public static FieldCacheRangeFilter newLongRange(String field, FieldCache.LongParser parser, Long lowerVal, Long upperVal, boolean includeLower, boolean includeUpper) { return new FieldCacheRangeFilter(field, parser, lowerVal, upperVal, includeLower, includeUpper) { @Override - public DocIdSet getDocIdSet(IndexReader reader) throws IOException { + public DocIdSet getDocIdSet(AtomicReaderContext context) throws IOException { final long inclusiveLowerPoint, inclusiveUpperPoint; if (lowerVal != null) { long i = lowerVal.longValue(); @@ -327,9 +328,9 @@ public abstract class FieldCacheRangeFilter extends Filter { if (inclusiveLowerPoint > inclusiveUpperPoint) return DocIdSet.EMPTY_DOCIDSET; - final long[] values = FieldCache.DEFAULT.getLongs(reader, field, (FieldCache.LongParser) parser); + final long[] values = FieldCache.DEFAULT.getLongs(context.reader, field, (FieldCache.LongParser) parser); // ignore deleted docs if range doesn't contain 0 - return new FieldCacheDocIdSet(reader, !(inclusiveLowerPoint <= 0L && inclusiveUpperPoint >= 0L)) { + return new FieldCacheDocIdSet(context.reader, !(inclusiveLowerPoint <= 0L && inclusiveUpperPoint >= 0L)) { @Override boolean matchDoc(int doc) { return values[doc] >= inclusiveLowerPoint && values[doc] <= inclusiveUpperPoint; @@ -356,7 +357,7 @@ public abstract class FieldCacheRangeFilter extends Filter { public static FieldCacheRangeFilter newFloatRange(String field, FieldCache.FloatParser parser, Float lowerVal, Float upperVal, boolean includeLower, boolean includeUpper) { return new FieldCacheRangeFilter(field, parser, lowerVal, upperVal, includeLower, includeUpper) { @Override - public DocIdSet getDocIdSet(IndexReader reader) throws IOException { + public DocIdSet getDocIdSet(AtomicReaderContext context) throws IOException { // we transform the floating point numbers to sortable integers // using NumericUtils to easier find the next bigger/lower value final float inclusiveLowerPoint, inclusiveUpperPoint; @@ -382,9 +383,9 @@ public abstract class FieldCacheRangeFilter extends Filter { if (inclusiveLowerPoint > inclusiveUpperPoint) return DocIdSet.EMPTY_DOCIDSET; - final float[] values = FieldCache.DEFAULT.getFloats(reader, field, (FieldCache.FloatParser) parser); + final float[] values = FieldCache.DEFAULT.getFloats(context.reader, field, (FieldCache.FloatParser) parser); // ignore deleted docs if range doesn't contain 0 - return new FieldCacheDocIdSet(reader, !(inclusiveLowerPoint <= 0.0f && inclusiveUpperPoint >= 0.0f)) { + return new FieldCacheDocIdSet(context.reader, !(inclusiveLowerPoint <= 0.0f && inclusiveUpperPoint >= 0.0f)) { @Override boolean matchDoc(int doc) { return values[doc] >= inclusiveLowerPoint && values[doc] <= inclusiveUpperPoint; @@ -411,7 +412,7 @@ public abstract class FieldCacheRangeFilter extends Filter { public static FieldCacheRangeFilter newDoubleRange(String field, FieldCache.DoubleParser parser, Double lowerVal, Double upperVal, boolean includeLower, boolean includeUpper) { return new FieldCacheRangeFilter(field, parser, lowerVal, upperVal, includeLower, includeUpper) { @Override - public DocIdSet getDocIdSet(IndexReader reader) throws IOException { + public DocIdSet getDocIdSet(AtomicReaderContext context) throws IOException { // we transform the floating point numbers to sortable integers // using NumericUtils to easier find the next bigger/lower value final double inclusiveLowerPoint, inclusiveUpperPoint; @@ -437,9 +438,9 @@ public abstract class FieldCacheRangeFilter extends Filter { if (inclusiveLowerPoint > inclusiveUpperPoint) return DocIdSet.EMPTY_DOCIDSET; - final double[] values = FieldCache.DEFAULT.getDoubles(reader, field, (FieldCache.DoubleParser) parser); + final double[] values = FieldCache.DEFAULT.getDoubles(context.reader, field, (FieldCache.DoubleParser) parser); // ignore deleted docs if range doesn't contain 0 - return new FieldCacheDocIdSet(reader, !(inclusiveLowerPoint <= 0.0 && inclusiveUpperPoint >= 0.0)) { + return new FieldCacheDocIdSet(context.reader, !(inclusiveLowerPoint <= 0.0 && inclusiveUpperPoint >= 0.0)) { @Override boolean matchDoc(int doc) { return values[doc] >= inclusiveLowerPoint && values[doc] <= inclusiveUpperPoint; diff --git a/lucene/src/java/org/apache/lucene/search/FieldCacheTermsFilter.java b/lucene/src/java/org/apache/lucene/search/FieldCacheTermsFilter.java index 57f8be754a4..7c9099b6d2c 100644 --- a/lucene/src/java/org/apache/lucene/search/FieldCacheTermsFilter.java +++ b/lucene/src/java/org/apache/lucene/search/FieldCacheTermsFilter.java @@ -21,6 +21,7 @@ import java.io.IOException; import org.apache.lucene.index.DocsEnum; // javadoc @link import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexReader.AtomicReaderContext; import org.apache.lucene.util.OpenBitSet; import org.apache.lucene.util.BytesRef; @@ -115,8 +116,8 @@ public class FieldCacheTermsFilter extends Filter { } @Override - public DocIdSet getDocIdSet(IndexReader reader) throws IOException { - return new FieldCacheTermsFilterDocIdSet(getFieldCache().getTermsIndex(reader, field)); + public DocIdSet getDocIdSet(AtomicReaderContext context) throws IOException { + return new FieldCacheTermsFilterDocIdSet(getFieldCache().getTermsIndex(context.reader, field)); } protected class FieldCacheTermsFilterDocIdSet extends DocIdSet { diff --git a/lucene/src/java/org/apache/lucene/search/FieldComparator.java b/lucene/src/java/org/apache/lucene/search/FieldComparator.java index 27d10aecd41..1f237babb3f 100644 --- a/lucene/src/java/org/apache/lucene/search/FieldComparator.java +++ b/lucene/src/java/org/apache/lucene/search/FieldComparator.java @@ -21,7 +21,7 @@ import java.io.IOException; import java.text.Collator; import java.util.Locale; -import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexReader.AtomicReaderContext; import org.apache.lucene.index.values.DocValues.Source; import org.apache.lucene.search.FieldCache.DocTerms; import org.apache.lucene.search.FieldCache.DocTermsIndex; @@ -83,7 +83,7 @@ import org.apache.lucene.util.packed.PackedInts; * priority queue. The {@link FieldValueHitQueue} * calls this method when a new hit is competitive. * - *

  • {@link #setNextReader} Invoked + *
  • {@link #setNextReader(IndexReader.AtomicReaderContext)} Invoked * when the search is switching to the next segment. * You may need to update internal state of the * comparator, for example retrieving new values from @@ -151,18 +151,17 @@ public abstract class FieldComparator { public abstract void copy(int slot, int doc) throws IOException; /** - * Set a new Reader. All subsequent docIDs are relative to + * Set a new {@link AtomicReaderContext}. All subsequent docIDs are relative to * the current reader (you must add docBase if you need to * map it to a top-level docID). * - * @param reader current reader - * @param docBase docBase of this reader + * @param context current reader context * @return the comparator to use for this segment; most * comparators can just return "this" to reuse the same * comparator across segments * @throws IOException */ - public abstract FieldComparator setNextReader(IndexReader reader, int docBase) throws IOException; + public abstract FieldComparator setNextReader(AtomicReaderContext context) throws IOException; /** Sets the Scorer to use in case a document's score is * needed. @@ -242,8 +241,8 @@ public abstract class FieldComparator { } @Override - public FieldComparator setNextReader(IndexReader reader, int docBase) throws IOException { - setup(FieldCache.DEFAULT.getBytes(reader, creator.field, creator)); + public FieldComparator setNextReader(AtomicReaderContext context) throws IOException { + setup(FieldCache.DEFAULT.getBytes(context.reader, creator.field, creator)); docValues = cached.values; return this; } @@ -314,8 +313,8 @@ public abstract class FieldComparator { } @Override - public FieldComparator setNextReader(IndexReader reader, int docBase) throws IOException { - setup(FieldCache.DEFAULT.getDoubles(reader, creator.field, creator)); + public FieldComparator setNextReader(AtomicReaderContext context) throws IOException { + setup(FieldCache.DEFAULT.getDoubles(context.reader, creator.field, creator)); docValues = cached.values; return this; } @@ -374,8 +373,8 @@ public abstract class FieldComparator { } @Override - public FieldComparator setNextReader(IndexReader reader, int docBase) throws IOException { - currentReaderValues = reader.docValues(field).getSource(); + public FieldComparator setNextReader(AtomicReaderContext context) throws IOException { + currentReaderValues = context.reader.docValues(field).getSource(); return this; } @@ -447,8 +446,8 @@ public abstract class FieldComparator { } @Override - public FieldComparator setNextReader(IndexReader reader, int docBase) throws IOException { - setup(FieldCache.DEFAULT.getFloats(reader, creator.field, creator)); + public FieldComparator setNextReader(AtomicReaderContext context) throws IOException { + setup(FieldCache.DEFAULT.getFloats(context.reader, creator.field, creator)); docValues = cached.values; return this; } @@ -503,8 +502,8 @@ public abstract class FieldComparator { } @Override - public FieldComparator setNextReader(IndexReader reader, int docBase) throws IOException { - setup( FieldCache.DEFAULT.getShorts(reader, creator.field, creator)); + public FieldComparator setNextReader(AtomicReaderContext context) throws IOException { + setup( FieldCache.DEFAULT.getShorts(context.reader, creator.field, creator)); docValues = cached.values; return this; } @@ -581,8 +580,8 @@ public abstract class FieldComparator { } @Override - public FieldComparator setNextReader(IndexReader reader, int docBase) throws IOException { - setup(FieldCache.DEFAULT.getInts(reader, creator.field, creator)); + public FieldComparator setNextReader(AtomicReaderContext context) throws IOException { + setup(FieldCache.DEFAULT.getInts(context.reader, creator.field, creator)); docValues = cached.values; return this; } @@ -645,8 +644,8 @@ public abstract class FieldComparator { } @Override - public FieldComparator setNextReader(IndexReader reader, int docBase) throws IOException { - currentReaderValues = reader.docValues(field).getSource(); + public FieldComparator setNextReader(AtomicReaderContext context) throws IOException { + currentReaderValues = context.reader.docValues(field).getSource(); return this; } @@ -719,8 +718,8 @@ public abstract class FieldComparator { } @Override - public FieldComparator setNextReader(IndexReader reader, int docBase) throws IOException { - setup(FieldCache.DEFAULT.getLongs(reader, creator.field, creator)); + public FieldComparator setNextReader(AtomicReaderContext context) throws IOException { + setup(FieldCache.DEFAULT.getLongs(context.reader, creator.field, creator)); docValues = cached.values; return this; } @@ -770,7 +769,7 @@ public abstract class FieldComparator { } @Override - public FieldComparator setNextReader(IndexReader reader, int docBase) { + public FieldComparator setNextReader(AtomicReaderContext context) { return this; } @@ -822,11 +821,11 @@ public abstract class FieldComparator { } @Override - public FieldComparator setNextReader(IndexReader reader, int docBase) { + public FieldComparator setNextReader(AtomicReaderContext context) { // TODO: can we "map" our docIDs to the current // reader? saves having to then subtract on every // compare call - this.docBase = docBase; + this.docBase = context.docBase; return this; } @@ -903,8 +902,8 @@ public abstract class FieldComparator { } @Override - public FieldComparator setNextReader(IndexReader reader, int docBase) throws IOException { - currentDocTerms = FieldCache.DEFAULT.getTerms(reader, field); + public FieldComparator setNextReader(AtomicReaderContext context) throws IOException { + currentDocTerms = FieldCache.DEFAULT.getTerms(context.reader, field); return this; } @@ -998,8 +997,8 @@ public abstract class FieldComparator { abstract class PerSegmentComparator extends FieldComparator { @Override - public FieldComparator setNextReader(IndexReader reader, int docBase) throws IOException { - return TermOrdValComparator.this.setNextReader(reader, docBase); + public FieldComparator setNextReader(AtomicReaderContext context) throws IOException { + return TermOrdValComparator.this.setNextReader(context); } @Override @@ -1264,8 +1263,9 @@ public abstract class FieldComparator { } @Override - public FieldComparator setNextReader(IndexReader reader, int docBase) throws IOException { - termsIndex = FieldCache.DEFAULT.getTermsIndex(reader, field); + public FieldComparator setNextReader(AtomicReaderContext context) throws IOException { + final int docBase = context.docBase; + termsIndex = FieldCache.DEFAULT.getTermsIndex(context.reader, field); final PackedInts.Reader docToOrd = termsIndex.getDocToOrd(); FieldComparator perSegComp; if (docToOrd instanceof Direct8) { @@ -1379,8 +1379,8 @@ public abstract class FieldComparator { } @Override - public FieldComparator setNextReader(IndexReader reader, int docBase) throws IOException { - docTerms = FieldCache.DEFAULT.getTerms(reader, field); + public FieldComparator setNextReader(AtomicReaderContext context) throws IOException { + docTerms = FieldCache.DEFAULT.getTerms(context.reader, field); return this; } diff --git a/lucene/src/java/org/apache/lucene/search/FieldDoc.java b/lucene/src/java/org/apache/lucene/search/FieldDoc.java index faf54a079ea..e6a5b1a34b1 100644 --- a/lucene/src/java/org/apache/lucene/search/FieldDoc.java +++ b/lucene/src/java/org/apache/lucene/search/FieldDoc.java @@ -43,7 +43,7 @@ public class FieldDoc extends ScoreDoc { * Sort object. Each Object will be either an Integer, Float or String, * depending on the type of values in the terms of the original field. * @see Sort - * @see Searcher#search(Query,Filter,int,Sort) + * @see IndexSearcher#search(Query,Filter,int,Sort) */ public Comparable[] fields; diff --git a/lucene/src/java/org/apache/lucene/search/FieldValueHitQueue.java b/lucene/src/java/org/apache/lucene/search/FieldValueHitQueue.java index 13ecb8672de..394d13539ed 100644 --- a/lucene/src/java/org/apache/lucene/search/FieldValueHitQueue.java +++ b/lucene/src/java/org/apache/lucene/search/FieldValueHitQueue.java @@ -28,7 +28,7 @@ import org.apache.lucene.util.PriorityQueue; * * @lucene.experimental * @since 2.9 - * @see Searcher#search(Query,Filter,int,Sort) + * @see IndexSearcher#search(Query,Filter,int,Sort) * @see FieldCache */ public abstract class FieldValueHitQueue extends PriorityQueue { @@ -57,9 +57,6 @@ public abstract class FieldValueHitQueue extends PriorityQueueNOTE: null can be returned if @@ -49,5 +53,5 @@ public abstract class Filter implements java.io.Serializable { * * @see DocIdBitSet */ - public abstract DocIdSet getDocIdSet(IndexReader reader) throws IOException; + public abstract DocIdSet getDocIdSet(AtomicReaderContext context) throws IOException; } diff --git a/lucene/src/java/org/apache/lucene/search/FilterManager.java b/lucene/src/java/org/apache/lucene/search/FilterManager.java deleted file mode 100644 index 608f243890b..00000000000 --- a/lucene/src/java/org/apache/lucene/search/FilterManager.java +++ /dev/null @@ -1,203 +0,0 @@ -package org.apache.lucene.search; - -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import java.util.Comparator; -import java.util.Date; -import java.util.HashMap; -import java.util.Iterator; -import java.util.Map; -import java.util.TreeSet; - -import org.apache.lucene.util.ThreadInterruptedException; - -/** - * Filter caching singleton. It can be used - * to save filters locally for reuse. - * This class makes it possible to cache Filters even when using RMI, as it - * keeps the cache on the searcher side of the RMI connection. - * - * Also could be used as a persistent storage for any filter as long as the - * filter provides a proper hashCode(), as that is used as the key in the cache. - * - * The cache is periodically cleaned up from a separate thread to ensure the - * cache doesn't exceed the maximum size. - */ -public class FilterManager { - - protected static FilterManager manager; - - /** The default maximum number of Filters in the cache */ - protected static final int DEFAULT_CACHE_CLEAN_SIZE = 100; - /** The default frequency of cache cleanup */ - protected static final long DEFAULT_CACHE_SLEEP_TIME = 1000 * 60 * 10; - - /** The cache itself */ - protected Map cache; - /** Maximum allowed cache size */ - protected int cacheCleanSize; - /** Cache cleaning frequency */ - protected long cleanSleepTime; - /** Cache cleaner that runs in a separate thread */ - protected FilterCleaner filterCleaner; - - public synchronized static FilterManager getInstance() { - if (manager == null) { - manager = new FilterManager(); - } - return manager; - } - - /** - * Sets up the FilterManager singleton. - */ - protected FilterManager() { - cache = new HashMap(); - cacheCleanSize = DEFAULT_CACHE_CLEAN_SIZE; // Let the cache get to 100 items - cleanSleepTime = DEFAULT_CACHE_SLEEP_TIME; // 10 minutes between cleanings - - filterCleaner = new FilterCleaner(); - Thread fcThread = new Thread(filterCleaner); - // set to be a Daemon so it doesn't have to be stopped - fcThread.setDaemon(true); - fcThread.start(); - } - - /** - * Sets the max size that cache should reach before it is cleaned up - * @param cacheCleanSize maximum allowed cache size - */ - public void setCacheSize(int cacheCleanSize) { - this.cacheCleanSize = cacheCleanSize; - } - - /** - * Sets the cache cleaning frequency in milliseconds. - * @param cleanSleepTime cleaning frequency in milliseconds - */ - public void setCleanThreadSleepTime(long cleanSleepTime) { - this.cleanSleepTime = cleanSleepTime; - } - - /** - * Returns the cached version of the filter. Allows the caller to pass up - * a small filter but this will keep a persistent version around and allow - * the caching filter to do its job. - * - * @param filter The input filter - * @return The cached version of the filter - */ - public Filter getFilter(Filter filter) { - synchronized(cache) { - FilterItem fi = null; - fi = cache.get(Integer.valueOf(filter.hashCode())); - if (fi != null) { - fi.timestamp = new Date().getTime(); - return fi.filter; - } - cache.put(Integer.valueOf(filter.hashCode()), new FilterItem(filter)); - return filter; - } - } - - /** - * Holds the filter and the last time the filter was used, to make LRU-based - * cache cleaning possible. - * TODO: Clean this up when we switch to Java 1.5 - */ - protected class FilterItem { - public Filter filter; - public long timestamp; - - public FilterItem (Filter filter) { - this.filter = filter; - this.timestamp = new Date().getTime(); - } - } - - - /** - * Keeps the cache from getting too big. - * If we were using Java 1.5, we could use LinkedHashMap and we would not need this thread - * to clean out the cache. - * - * The SortedSet sortedFilterItems is used only to sort the items from the cache, - * so when it's time to clean up we have the TreeSet sort the FilterItems by - * timestamp. - * - * Removes 1.5 * the numbers of items to make the cache smaller. - * For example: - * If cache clean size is 10, and the cache is at 15, we would remove (15 - 10) * 1.5 = 7.5 round up to 8. - * This way we clean the cache a bit more, and avoid having the cache cleaner having to do it frequently. - */ - protected class FilterCleaner implements Runnable { - - private boolean running = true; - private TreeSet> sortedFilterItems; - - public FilterCleaner() { - sortedFilterItems = new TreeSet>(new Comparator>() { - public int compare(Map.Entry a, Map.Entry b) { - FilterItem fia = a.getValue(); - FilterItem fib = b.getValue(); - if ( fia.timestamp == fib.timestamp ) { - return 0; - } - // smaller timestamp first - if ( fia.timestamp < fib.timestamp ) { - return -1; - } - // larger timestamp last - return 1; - - } - }); - } - - public void run () { - while (running) { - - // sort items from oldest to newest - // we delete the oldest filters - if (cache.size() > cacheCleanSize) { - // empty the temporary set - sortedFilterItems.clear(); - synchronized (cache) { - sortedFilterItems.addAll(cache.entrySet()); - Iterator> it = sortedFilterItems.iterator(); - int numToDelete = (int) ((cache.size() - cacheCleanSize) * 1.5); - int counter = 0; - // loop over the set and delete all of the cache entries not used in a while - while (it.hasNext() && counter++ < numToDelete) { - Map.Entry entry = it.next(); - cache.remove(entry.getKey()); - } - } - // empty the set so we don't tie up the memory - sortedFilterItems.clear(); - } - // take a nap - try { - Thread.sleep(cleanSleepTime); - } catch (InterruptedException ie) { - throw new ThreadInterruptedException(ie); - } - } - } - } -} diff --git a/lucene/src/java/org/apache/lucene/search/FilteredQuery.java b/lucene/src/java/org/apache/lucene/search/FilteredQuery.java index 6f27cfc6773..1bcd8459d84 100644 --- a/lucene/src/java/org/apache/lucene/search/FilteredQuery.java +++ b/lucene/src/java/org/apache/lucene/search/FilteredQuery.java @@ -18,6 +18,7 @@ package org.apache.lucene.search; */ import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexReader.AtomicReaderContext; import org.apache.lucene.index.Term; import org.apache.lucene.util.ToStringUtils; @@ -61,7 +62,6 @@ extends Query { @Override public Weight createWeight(final IndexSearcher searcher) throws IOException { final Weight weight = query.createWeight (searcher); - final Similarity similarity = query.getSimilarity(searcher); return new Weight() { private float value; @@ -81,7 +81,7 @@ extends Query { } @Override - public Explanation explain (IndexReader ir, int i) throws IOException { + public Explanation explain (AtomicReaderContext ir, int i) throws IOException { Explanation inner = weight.explain (ir, i); if (getBoost()!=1) { Explanation preBoost = inner; @@ -111,13 +111,13 @@ extends Query { // return a filtering scorer @Override - public Scorer scorer(IndexReader indexReader, boolean scoreDocsInOrder, boolean topScorer) + public Scorer scorer(AtomicReaderContext context, ScorerContext scoreContext) throws IOException { - final Scorer scorer = weight.scorer(indexReader, true, false); + final Scorer scorer = weight.scorer(context, ScorerContext.def()); if (scorer == null) { return null; } - DocIdSet docIdSet = filter.getDocIdSet(indexReader); + DocIdSet docIdSet = filter.getDocIdSet(context); if (docIdSet == null) { return null; } @@ -126,7 +126,7 @@ extends Query { return null; } - return new Scorer(similarity, this) { + return new Scorer(this) { private int doc = -1; diff --git a/lucene/src/java/org/apache/lucene/search/FilteredTermsEnum.java b/lucene/src/java/org/apache/lucene/search/FilteredTermsEnum.java index 84437b755f0..0cfc80c9f57 100644 --- a/lucene/src/java/org/apache/lucene/search/FilteredTermsEnum.java +++ b/lucene/src/java/org/apache/lucene/search/FilteredTermsEnum.java @@ -21,6 +21,7 @@ import java.io.IOException; import java.util.Comparator; import org.apache.lucene.util.BytesRef; +import org.apache.lucene.index.TermState; import org.apache.lucene.index.TermsEnum; import org.apache.lucene.index.DocsEnum; import org.apache.lucene.index.DocsAndPositionsEnum; @@ -121,10 +122,15 @@ public abstract class FilteredTermsEnum extends TermsEnum { } @Override - public int docFreq() { + public int docFreq() throws IOException { return tenum.docFreq(); } + @Override + public long totalTermFreq() throws IOException { + return tenum.totalTermFreq(); + } + /** This enum does not support seeking! * @throws UnsupportedOperationException */ @@ -155,12 +161,24 @@ public abstract class FilteredTermsEnum extends TermsEnum { public DocsAndPositionsEnum docsAndPositions(Bits bits, DocsAndPositionsEnum reuse) throws IOException { return tenum.docsAndPositions(bits, reuse); } - + + /** This enum does not support seeking! + * @throws UnsupportedOperationException + */ @Override - public void cacheCurrentTerm() throws IOException { - tenum.cacheCurrentTerm(); + public void seek(BytesRef term, TermState state) throws IOException { + throw new UnsupportedOperationException(getClass().getName()+" does not support seeking"); } - + + /** + * Returns the filtered enums term state + */ + @Override + public TermState termState() throws IOException { + assert tenum != null; + return tenum.termState(); + } + @SuppressWarnings("fallthrough") @Override public BytesRef next() throws IOException { diff --git a/lucene/src/java/org/apache/lucene/search/FuzzyTermsEnum.java b/lucene/src/java/org/apache/lucene/search/FuzzyTermsEnum.java index 48a7709bed0..b9694d10aa2 100644 --- a/lucene/src/java/org/apache/lucene/search/FuzzyTermsEnum.java +++ b/lucene/src/java/org/apache/lucene/search/FuzzyTermsEnum.java @@ -20,7 +20,9 @@ package org.apache.lucene.search; import org.apache.lucene.index.DocsAndPositionsEnum; import org.apache.lucene.index.DocsEnum; import org.apache.lucene.index.Term; +import org.apache.lucene.index.TermState; import org.apache.lucene.index.TermsEnum; +import org.apache.lucene.search.AutomatonTermsEnum.CompiledAutomaton; import org.apache.lucene.util.Attribute; import org.apache.lucene.util.AttributeImpl; import org.apache.lucene.util.AttributeSource; @@ -139,18 +141,18 @@ public final class FuzzyTermsEnum extends TermsEnum { */ private TermsEnum getAutomatonEnum(int editDistance, BytesRef lastTerm) throws IOException { - final List runAutomata = initAutomata(editDistance); + final List runAutomata = initAutomata(editDistance); if (editDistance < runAutomata.size()) { return new AutomatonFuzzyTermsEnum(runAutomata.subList(0, editDistance + 1) - .toArray(new ByteRunAutomaton[editDistance + 1]), lastTerm); + .toArray(new CompiledAutomaton[editDistance + 1]), lastTerm); } else { return null; } } /** initialize levenshtein DFAs up to maxDistance, if possible */ - private List initAutomata(int maxDistance) { - final List runAutomata = dfaAtt.automata(); + private List initAutomata(int maxDistance) { + final List runAutomata = dfaAtt.automata(); if (runAutomata.size() <= maxDistance && maxDistance <= LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE) { LevenshteinAutomata builder = @@ -164,7 +166,7 @@ public final class FuzzyTermsEnum extends TermsEnum { UnicodeUtil.newString(termText, 0, realPrefixLength)); a = BasicOperations.concatenate(prefix, a); } - runAutomata.add(new ByteRunAutomaton(a)); + runAutomata.add(new CompiledAutomaton(a, true)); } } return runAutomata; @@ -240,15 +242,15 @@ public final class FuzzyTermsEnum extends TermsEnum { // proxy all other enum calls to the actual enum @Override - public int docFreq() { + public int docFreq() throws IOException { return actualEnum.docFreq(); } - - @Override - public void cacheCurrentTerm() throws IOException { - actualEnum.cacheCurrentTerm(); - } + @Override + public long totalTermFreq() throws IOException { + return actualEnum.totalTermFreq(); + } + @Override public DocsEnum docs(Bits skipDocs, DocsEnum reuse) throws IOException { return actualEnum.docs(skipDocs, reuse); @@ -260,6 +262,16 @@ public final class FuzzyTermsEnum extends TermsEnum { return actualEnum.docsAndPositions(skipDocs, reuse); } + @Override + public void seek(BytesRef term, TermState state) throws IOException { + actualEnum.seek(term, state); + } + + @Override + public TermState termState() throws IOException { + return actualEnum.termState(); + } + @Override public Comparator getComparator() throws IOException { return actualEnum.getComparator(); @@ -301,10 +313,12 @@ public final class FuzzyTermsEnum extends TermsEnum { private final BoostAttribute boostAtt = attributes().addAttribute(BoostAttribute.class); - public AutomatonFuzzyTermsEnum(ByteRunAutomaton matchers[], + public AutomatonFuzzyTermsEnum(CompiledAutomaton compiled[], BytesRef lastTerm) throws IOException { - super(matchers[matchers.length - 1], tenum, true, null); - this.matchers = matchers; + super(tenum, compiled[compiled.length - 1]); + this.matchers = new ByteRunAutomaton[compiled.length]; + for (int i = 0; i < compiled.length; i++) + this.matchers[i] = compiled[i].runAutomaton; this.lastTerm = lastTerm; termRef = new BytesRef(term.text()); } @@ -552,14 +566,14 @@ public final class FuzzyTermsEnum extends TermsEnum { /** @lucene.internal */ public static interface LevenshteinAutomataAttribute extends Attribute { - public List automata(); + public List automata(); } /** @lucene.internal */ public static final class LevenshteinAutomataAttributeImpl extends AttributeImpl implements LevenshteinAutomataAttribute { - private final List automata = new ArrayList(); + private final List automata = new ArrayList(); - public List automata() { + public List automata() { return automata; } @@ -584,7 +598,7 @@ public final class FuzzyTermsEnum extends TermsEnum { @Override public void copyTo(AttributeImpl target) { - final List targetAutomata = + final List targetAutomata = ((LevenshteinAutomataAttribute) target).automata(); targetAutomata.clear(); targetAutomata.addAll(automata); diff --git a/lucene/src/java/org/apache/lucene/search/IndexSearcher.java b/lucene/src/java/org/apache/lucene/search/IndexSearcher.java index cc0dc763c08..e3e0a1b6602 100644 --- a/lucene/src/java/org/apache/lucene/search/IndexSearcher.java +++ b/lucene/src/java/org/apache/lucene/search/IndexSearcher.java @@ -18,9 +18,7 @@ package org.apache.lucene.search; */ import java.io.IOException; -import java.util.ArrayList; import java.util.Iterator; -import java.util.List; import java.util.NoSuchElementException; import java.util.concurrent.Callable; import java.util.concurrent.CompletionService; @@ -35,8 +33,12 @@ import org.apache.lucene.document.Document; import org.apache.lucene.document.FieldSelector; import org.apache.lucene.index.CorruptIndexException; import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexReader.AtomicReaderContext; +import org.apache.lucene.index.IndexReader.ReaderContext; import org.apache.lucene.index.Term; +import org.apache.lucene.search.Weight.ScorerContext; import org.apache.lucene.store.Directory; +import org.apache.lucene.store.NIOFSDirectory; // javadoc import org.apache.lucene.util.ReaderUtil; import org.apache.lucene.util.ThreadInterruptedException; @@ -56,18 +58,35 @@ import org.apache.lucene.util.ThreadInterruptedException; * use your own (non-Lucene) objects instead.

    */ public class IndexSearcher { - IndexReader reader; + final IndexReader reader; // package private for testing! private boolean closeReader; // NOTE: these members might change in incompatible ways // in the next release - protected final IndexReader[] subReaders; - protected final IndexSearcher[] subSearchers; - protected final int[] docStarts; + protected final ReaderContext readerContext; + protected final AtomicReaderContext[] leafContexts; + // used with executor - each slice holds a set of leafs executed within one thread + protected final LeafSlice[] leafSlices; + + // These are only used for multi-threaded search private final ExecutorService executor; - /** The Similarity implementation used by this searcher. */ - private Similarity similarity = Similarity.getDefault(); + // the default SimilarityProvider + private static final SimilarityProvider defaultProvider = new DefaultSimilarity(); + + /** + * Expert: returns a default SimilarityProvider instance. + * In general, this method is only called to initialize searchers and writers. + * User code and query implementations should respect + * {@link IndexSearcher#getSimilarityProvider()}. + * @lucene.internal + */ + public static SimilarityProvider getDefaultSimilarityProvider() { + return defaultProvider; + } + + /** The SimilarityProvider implementation used by this searcher. */ + private SimilarityProvider similarityProvider = defaultProvider; /** Creates a searcher searching the index in the named * directory, with readOnly=true @@ -114,83 +133,71 @@ public class IndexSearcher { this(r, false, executor); } - /** Expert: directly specify the reader, subReaders and - * their docID starts. + /** + * Creates a searcher searching the provided top-level {@link ReaderContext}. + *

    + * Given a non-null {@link ExecutorService} this method runs + * searches for each segment separately, using the provided ExecutorService. + * IndexSearcher will not shutdown/awaitTermination this ExecutorService on + * close; you must do so, eventually, on your own. NOTE: if you are using + * {@link NIOFSDirectory}, do not use the shutdownNow method of + * ExecutorService as this uses Thread.interrupt under-the-hood which can + * silently close file descriptors (see LUCENE-2239). * - * @lucene.experimental */ - public IndexSearcher(IndexReader reader, IndexReader[] subReaders, int[] docStarts) { - this.reader = reader; - this.subReaders = subReaders; - this.docStarts = docStarts; - subSearchers = new IndexSearcher[subReaders.length]; - for(int i=0;iLUCENE-2239). - * - * @lucene.experimental */ - public IndexSearcher(IndexReader reader, IndexReader[] subReaders, int[] docStarts, ExecutorService executor) { - this.reader = reader; - this.subReaders = subReaders; - this.docStarts = docStarts; - subSearchers = new IndexSearcher[subReaders.length]; - for(int i=0;i subReadersList = new ArrayList(); - gatherSubReaders(subReadersList, reader); - subReaders = subReadersList.toArray(new IndexReader[subReadersList.size()]); - docStarts = new int[subReaders.length]; - subSearchers = new IndexSearcher[subReaders.length]; - int maxDoc = 0; - for (int i = 0; i < subReaders.length; i++) { - docStarts[i] = maxDoc; - maxDoc += subReaders[i].maxDoc(); - if (subReaders[i] == r) { - subSearchers[i] = this; - } else { - subSearchers[i] = new IndexSearcher(subReaders[i]); - } + this.readerContext = context; + leafContexts = ReaderUtil.leaves(context); + this.leafSlices = executor == null ? null : slices(leafContexts); + } + + /** + * Expert: Creates an array of leaf slices each holding a subset of the given leaves. + * Each {@link LeafSlice} is executed in a single thread. By default there + * will be one {@link LeafSlice} per leaf ({@link AtomicReaderContext}). + */ + protected LeafSlice[] slices(AtomicReaderContext...leaves) { + LeafSlice[] slices = new LeafSlice[leaves.length]; + for (int i = 0; i < slices.length; i++) { + slices[i] = new LeafSlice(leaves[i]); } + return slices; } - protected void gatherSubReaders(List allSubReaders, IndexReader r) { - ReaderUtil.gatherSubReaders(allSubReaders, r); - } - + /** Return the {@link IndexReader} this searches. */ public IndexReader getIndexReader() { return reader; } - /** Returns the atomic subReaders used by this searcher. */ - public IndexReader[] getSubReaders() { - return subReaders; - } - /** Expert: Returns one greater than the largest possible document number. * * @see org.apache.lucene.index.IndexReader#maxDoc() @@ -205,11 +212,11 @@ public class IndexSearcher { return reader.docFreq(term); } else { final ExecutionHelper runner = new ExecutionHelper(executor); - for(int i = 0; i < subReaders.length; i++) { - final IndexSearcher searchable = subSearchers[i]; + for(int i = 0; i < leafContexts.length; i++) { + final IndexReader leaf = leafContexts[i].reader; runner.submit(new Callable() { public Integer call() throws IOException { - return Integer.valueOf(searchable.docFreq(term)); + return Integer.valueOf(leaf.docFreq(term)); } }); } @@ -231,16 +238,15 @@ public class IndexSearcher { return reader.document(docID, fieldSelector); } - /** Expert: Set the Similarity implementation used by this Searcher. + /** Expert: Set the SimilarityProvider implementation used by this Searcher. * - * @see Similarity#setDefault(Similarity) */ - public void setSimilarity(Similarity similarity) { - this.similarity = similarity; + public void setSimilarityProvider(SimilarityProvider similarityProvider) { + this.similarityProvider = similarityProvider; } - public Similarity getSimilarity() { - return similarity; + public SimilarityProvider getSimilarityProvider() { + return similarityProvider; } /** @@ -284,7 +290,7 @@ public class IndexSearcher { * *

    Applications should only use this if they need all of the * matching documents. The high-level search API ({@link - * Searcher#search(Query, Filter, int)}) is usually more efficient, as it skips + * IndexSearcher#search(Query, Filter, int)}) is usually more efficient, as it skips * non-high-scoring hits. * * @param query to match documents @@ -294,7 +300,7 @@ public class IndexSearcher { */ public void search(Query query, Filter filter, Collector results) throws IOException { - search(createWeight(query), filter, results); + search(leafContexts, createWeight(query), filter, results); } /** Lower-level search API. @@ -303,7 +309,7 @@ public class IndexSearcher { * *

    Applications should only use this if they need all of the * matching documents. The high-level search API ({@link - * Searcher#search(Query, int)}) is usually more efficient, as it skips + * IndexSearcher#search(Query, int)}) is usually more efficient, as it skips * non-high-scoring hits. *

    Note: The score passed to this method is a raw score. * In other words, the score will not necessarily be a float whose value is @@ -312,7 +318,7 @@ public class IndexSearcher { */ public void search(Query query, Collector results) throws IOException { - search(createWeight(query), null, results); + search(leafContexts, createWeight(query), null, results); } /** Search implementation with arbitrary sorting. Finds @@ -347,37 +353,30 @@ public class IndexSearcher { /** Expert: Low-level search implementation. Finds the top n * hits for query, applying filter if non-null. * - *

    Applications should usually call {@link Searcher#search(Query,int)} or - * {@link Searcher#search(Query,Filter,int)} instead. + *

    Applications should usually call {@link IndexSearcher#search(Query,int)} or + * {@link IndexSearcher#search(Query,Filter,int)} instead. * @throws BooleanQuery.TooManyClauses */ protected TopDocs search(Weight weight, Filter filter, int nDocs) throws IOException { - if (executor == null) { - // single thread - int limit = reader.maxDoc(); - if (limit == 0) { - limit = 1; - } - nDocs = Math.min(nDocs, limit); - TopScoreDocCollector collector = TopScoreDocCollector.create(nDocs, !weight.scoresDocsOutOfOrder()); - search(weight, filter, collector); - return collector.topDocs(); + return search(leafContexts, weight, filter, nDocs); } else { final HitQueue hq = new HitQueue(nDocs, false); final Lock lock = new ReentrantLock(); final ExecutionHelper runner = new ExecutionHelper(executor); - for (int i = 0; i < subReaders.length; i++) { // search each sub + for (int i = 0; i < leafSlices.length; i++) { // search each sub runner.submit( - new MultiSearcherCallableNoSort(lock, subSearchers[i], weight, filter, nDocs, hq, i, docStarts)); + new SearcherCallableNoSort(lock, this, leafSlices[i], weight, filter, nDocs, hq)); } int totalHits = 0; float maxScore = Float.NEGATIVE_INFINITY; for (final TopDocs topDocs : runner) { - totalHits += topDocs.totalHits; - maxScore = Math.max(maxScore, topDocs.getMaxScore()); + if(topDocs.totalHits != 0) { + totalHits += topDocs.totalHits; + maxScore = Math.max(maxScore, topDocs.getMaxScore()); + } } final ScoreDoc[] scoreDocs = new ScoreDoc[hq.size()]; @@ -388,13 +387,32 @@ public class IndexSearcher { } } + /** Expert: Low-level search implementation. Finds the top n + * hits for query, using the given leaf readers applying filter if non-null. + * + *

    Applications should usually call {@link IndexSearcher#search(Query,int)} or + * {@link IndexSearcher#search(Query,Filter,int)} instead. + * @throws BooleanQuery.TooManyClauses + */ + protected TopDocs search(AtomicReaderContext[] leaves, Weight weight, Filter filter, int nDocs) throws IOException { + // single thread + int limit = reader.maxDoc(); + if (limit == 0) { + limit = 1; + } + nDocs = Math.min(nDocs, limit); + TopScoreDocCollector collector = TopScoreDocCollector.create(nDocs, !weight.scoresDocsOutOfOrder()); + search(leaves, weight, filter, collector); + return collector.topDocs(); + } + /** Expert: Low-level search implementation with arbitrary sorting. Finds * the top n hits for query, applying * filter if non-null, and sorting the hits by the criteria in * sort. * *

    Applications should usually call {@link - * Searcher#search(Query,Filter,int,Sort)} instead. + * IndexSearcher#search(Query,Filter,int,Sort)} instead. * * @throws BooleanQuery.TooManyClauses */ @@ -419,33 +437,26 @@ public class IndexSearcher { throws IOException { if (sort == null) throw new NullPointerException(); - + if (executor == null) { - // single thread - int limit = reader.maxDoc(); - if (limit == 0) { - limit = 1; - } - nDocs = Math.min(nDocs, limit); - - TopFieldCollector collector = TopFieldCollector.create(sort, nDocs, - fillFields, fieldSortDoTrackScores, fieldSortDoMaxScore, !weight.scoresDocsOutOfOrder()); - search(weight, filter, collector); - return (TopFieldDocs) collector.topDocs(); + // use all leaves here! + return search (leafContexts, weight, filter, nDocs, sort, fillFields); } else { // TODO: make this respect fillFields final FieldDocSortedHitQueue hq = new FieldDocSortedHitQueue(nDocs); final Lock lock = new ReentrantLock(); final ExecutionHelper runner = new ExecutionHelper(executor); - for (int i = 0; i < subReaders.length; i++) { // search each sub + for (int i = 0; i < leafSlices.length; i++) { // search each leaf slice runner.submit( - new MultiSearcherCallableWithSort(lock, subSearchers[i], weight, filter, nDocs, hq, sort, i, docStarts)); + new SearcherCallableWithSort(lock, this, leafSlices[i], weight, filter, nDocs, hq, sort)); } int totalHits = 0; float maxScore = Float.NEGATIVE_INFINITY; for (final TopFieldDocs topFieldDocs : runner) { - totalHits += topFieldDocs.totalHits; - maxScore = Math.max(maxScore, topFieldDocs.getMaxScore()); + if (topFieldDocs.totalHits != 0) { + totalHits += topFieldDocs.totalHits; + maxScore = Math.max(maxScore, topFieldDocs.getMaxScore()); + } } final ScoreDoc[] scoreDocs = new ScoreDoc[hq.size()]; for (int i = hq.size() - 1; i >= 0; i--) // put docs in array @@ -454,6 +465,33 @@ public class IndexSearcher { return new TopFieldDocs(totalHits, scoreDocs, hq.getFields(), maxScore); } } + + + /** + * Just like {@link #search(Weight, Filter, int, Sort)}, but you choose + * whether or not the fields in the returned {@link FieldDoc} instances should + * be set by specifying fillFields. + * + *

    NOTE: this does not compute scores by default. If you + * need scores, create a {@link TopFieldCollector} + * instance by calling {@link TopFieldCollector#create} and + * then pass that to {@link #search(Weight, Filter, + * Collector)}.

    + */ + protected TopFieldDocs search(AtomicReaderContext[] leaves, Weight weight, Filter filter, int nDocs, + Sort sort, boolean fillFields) throws IOException { + // single thread + int limit = reader.maxDoc(); + if (limit == 0) { + limit = 1; + } + nDocs = Math.min(nDocs, limit); + + TopFieldCollector collector = TopFieldCollector.create(sort, nDocs, + fillFields, fieldSortDoTrackScores, fieldSortDoMaxScore, !weight.scoresDocsOutOfOrder()); + search(leaves, weight, filter, collector); + return (TopFieldDocs) collector.topDocs(); + } /** * Lower-level search API. @@ -464,9 +502,15 @@ public class IndexSearcher { * *

    * Applications should only use this if they need all of the matching - * documents. The high-level search API ({@link Searcher#search(Query,int)}) is + * documents. The high-level search API ({@link IndexSearcher#search(Query,int)}) is * usually more efficient, as it skips non-high-scoring hits. * + *

    + * NOTE: this method executes the searches on all given leaves exclusively. + * To search across all the searchers leaves use {@link #leafContexts}. + * + * @param leaves + * the searchers leaves to execute the searches on * @param weight * to match documents * @param filter @@ -475,35 +519,36 @@ public class IndexSearcher { * to receive hits * @throws BooleanQuery.TooManyClauses */ - protected void search(Weight weight, Filter filter, Collector collector) + protected void search(AtomicReaderContext[] leaves, Weight weight, Filter filter, Collector collector) throws IOException { // TODO: should we make this // threaded...? the Collector could be sync'd? - + ScorerContext scorerContext = ScorerContext.def().scoreDocsInOrder(true).topScorer(true); // always use single thread: if (filter == null) { - for (int i = 0; i < subReaders.length; i++) { // search each subreader - collector.setNextReader(subReaders[i], docStarts[i]); - Scorer scorer = weight.scorer(subReaders[i], !collector.acceptsDocsOutOfOrder(), true); + for (int i = 0; i < leaves.length; i++) { // search each subreader + collector.setNextReader(leaves[i]); + scorerContext = scorerContext.scoreDocsInOrder(!collector.acceptsDocsOutOfOrder()); + Scorer scorer = weight.scorer(leaves[i], scorerContext); if (scorer != null) { scorer.score(collector); } } } else { - for (int i = 0; i < subReaders.length; i++) { // search each subreader - collector.setNextReader(subReaders[i], docStarts[i]); - searchWithFilter(subReaders[i], weight, filter, collector); + for (int i = 0; i < leaves.length; i++) { // search each subreader + collector.setNextReader(leaves[i]); + searchWithFilter(leaves[i], weight, filter, collector); } } } - private void searchWithFilter(IndexReader reader, Weight weight, + private void searchWithFilter(AtomicReaderContext context, Weight weight, final Filter filter, final Collector collector) throws IOException { assert filter != null; - Scorer scorer = weight.scorer(reader, true, false); + Scorer scorer = weight.scorer(context, ScorerContext.def()); if (scorer == null) { return; } @@ -512,7 +557,7 @@ public class IndexSearcher { assert docID == -1 || docID == DocIdSetIterator.NO_MORE_DOCS; // CHECKME: use ConjunctionScorer here? - DocIdSet filterDocIdSet = filter.getDocIdSet(reader); + DocIdSet filterDocIdSet = filter.getDocIdSet(context); if (filterDocIdSet == null) { // this means the filter does not accept any documents. return; @@ -576,14 +621,14 @@ public class IndexSearcher { * and, for good performance, should not be displayed with every hit. * Computing an explanation is as expensive as executing the query over the * entire index. - *

    Applications should call {@link Searcher#explain(Query, int)}. + *

    Applications should call {@link IndexSearcher#explain(Query, int)}. * @throws BooleanQuery.TooManyClauses */ protected Explanation explain(Weight weight, int doc) throws IOException { - int n = ReaderUtil.subIndex(doc, docStarts); - int deBasedDoc = doc - docStarts[n]; + int n = ReaderUtil.subIndex(doc, leafContexts); + int deBasedDoc = doc - leafContexts[n].docBase; - return weight.explain(subReaders[n], deBasedDoc); + return weight.explain(leafContexts[n], deBasedDoc); } private boolean fieldSortDoTrackScores; @@ -614,39 +659,44 @@ public class IndexSearcher { return query.weight(this); } + /** + * Returns this searchers the top-level {@link ReaderContext}. + * @see IndexReader#getTopReaderContext() + */ + /* sugar for #getReader().getTopReaderContext() */ + public ReaderContext getTopReaderContext() { + return readerContext; + } /** * A thread subclass for searching a single searchable */ - private static final class MultiSearcherCallableNoSort implements Callable { + private static final class SearcherCallableNoSort implements Callable { private final Lock lock; - private final IndexSearcher searchable; + private final IndexSearcher searcher; private final Weight weight; private final Filter filter; private final int nDocs; - private final int i; private final HitQueue hq; - private final int[] starts; + private final LeafSlice slice; - public MultiSearcherCallableNoSort(Lock lock, IndexSearcher searchable, Weight weight, - Filter filter, int nDocs, HitQueue hq, int i, int[] starts) { + public SearcherCallableNoSort(Lock lock, IndexSearcher searcher, LeafSlice slice, Weight weight, + Filter filter, int nDocs, HitQueue hq) { this.lock = lock; - this.searchable = searchable; + this.searcher = searcher; this.weight = weight; this.filter = filter; this.nDocs = nDocs; this.hq = hq; - this.i = i; - this.starts = starts; + this.slice = slice; } public TopDocs call() throws IOException { - final TopDocs docs = searchable.search (weight, filter, nDocs); + final TopDocs docs = searcher.search (slice.leaves, weight, filter, nDocs); final ScoreDoc[] scoreDocs = docs.scoreDocs; for (int j = 0; j < scoreDocs.length; j++) { // merge scoreDocs into hq final ScoreDoc scoreDoc = scoreDocs[j]; - scoreDoc.doc += starts[i]; // convert doc //it would be so nice if we had a thread-safe insert lock.lock(); try { @@ -664,47 +714,31 @@ public class IndexSearcher { /** * A thread subclass for searching a single searchable */ - private static final class MultiSearcherCallableWithSort implements Callable { + private static final class SearcherCallableWithSort implements Callable { private final Lock lock; - private final IndexSearcher searchable; + private final IndexSearcher searcher; private final Weight weight; private final Filter filter; private final int nDocs; - private final int i; private final FieldDocSortedHitQueue hq; - private final int[] starts; private final Sort sort; + private final LeafSlice slice; - public MultiSearcherCallableWithSort(Lock lock, IndexSearcher searchable, Weight weight, - Filter filter, int nDocs, FieldDocSortedHitQueue hq, Sort sort, int i, int[] starts) { + public SearcherCallableWithSort(Lock lock, IndexSearcher searcher, LeafSlice slice, Weight weight, + Filter filter, int nDocs, FieldDocSortedHitQueue hq, Sort sort) { this.lock = lock; - this.searchable = searchable; + this.searcher = searcher; this.weight = weight; this.filter = filter; this.nDocs = nDocs; this.hq = hq; - this.i = i; - this.starts = starts; this.sort = sort; + this.slice = slice; } public TopFieldDocs call() throws IOException { - final TopFieldDocs docs = searchable.search (weight, filter, nDocs, sort); - // If one of the Sort fields is FIELD_DOC, need to fix its values, so that - // it will break ties by doc Id properly. Otherwise, it will compare to - // 'relative' doc Ids, that belong to two different searchables. - for (int j = 0; j < docs.fields.length; j++) { - if (docs.fields[j].getType() == SortField.DOC) { - // iterate over the score docs and change their fields value - for (int j2 = 0; j2 < docs.scoreDocs.length; j2++) { - FieldDoc fd = (FieldDoc) docs.scoreDocs[j2]; - fd.fields[j] = Integer.valueOf(((Integer) fd.fields[j]).intValue() + starts[i]); - } - break; - } - } - + final TopFieldDocs docs = searcher.search (slice.leaves, weight, filter, nDocs, sort, true); lock.lock(); try { hq.setFields(docs.fields); @@ -715,7 +749,6 @@ public class IndexSearcher { final ScoreDoc[] scoreDocs = docs.scoreDocs; for (int j = 0; j < scoreDocs.length; j++) { // merge scoreDocs into hq final FieldDoc fieldDoc = (FieldDoc) scoreDocs[j]; - fieldDoc.doc += starts[i]; // convert doc //it would be so nice if we had a thread-safe insert lock.lock(); try { @@ -772,8 +805,22 @@ public class IndexSearcher { } public Iterator iterator() { - // use the shortcut here - this is only used in a privat context + // use the shortcut here - this is only used in a private context return this; } } + + /** + * A class holding a subset of the {@link IndexSearcher}s leaf contexts to be + * executed within a single thread. + * + * @lucene.experimental + */ + public static class LeafSlice { + final AtomicReaderContext[] leaves; + + public LeafSlice(AtomicReaderContext...leaves) { + this.leaves = leaves; + } + } } diff --git a/lucene/src/java/org/apache/lucene/search/MatchAllDocsQuery.java b/lucene/src/java/org/apache/lucene/search/MatchAllDocsQuery.java index eb4fcc1ae0d..40cdc1877cd 100644 --- a/lucene/src/java/org/apache/lucene/search/MatchAllDocsQuery.java +++ b/lucene/src/java/org/apache/lucene/search/MatchAllDocsQuery.java @@ -18,6 +18,7 @@ package org.apache.lucene.search; */ import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexReader.AtomicReaderContext; import org.apache.lucene.index.Term; import org.apache.lucene.util.ToStringUtils; import org.apache.lucene.util.Bits; @@ -50,10 +51,12 @@ public class MatchAllDocsQuery extends Query { private int doc = -1; private final int maxDoc; private final Bits delDocs; + private final Similarity similarity; MatchAllScorer(IndexReader reader, Similarity similarity, Weight w, byte[] norms) throws IOException { - super(similarity,w); + super(w); + this.similarity = similarity; delDocs = reader.getDeletedDocs(); score = w.getValue(); maxDoc = reader.maxDoc(); @@ -79,7 +82,7 @@ public class MatchAllDocsQuery extends Query { @Override public float score() { - return norms == null ? score : score * getSimilarity().decodeNormValue(norms[docID()]); + return norms == null ? score : score * similarity.decodeNormValue(norms[docID()]); } @Override @@ -95,7 +98,7 @@ public class MatchAllDocsQuery extends Query { private float queryNorm; public MatchAllDocsWeight(IndexSearcher searcher) { - this.similarity = searcher.getSimilarity(); + this.similarity = normsField == null ? null : searcher.getSimilarityProvider().get(normsField); } @Override @@ -126,13 +129,13 @@ public class MatchAllDocsQuery extends Query { } @Override - public Scorer scorer(IndexReader reader, boolean scoreDocsInOrder, boolean topScorer) throws IOException { - return new MatchAllScorer(reader, similarity, this, - normsField != null ? reader.norms(normsField) : null); + public Scorer scorer(AtomicReaderContext context, ScorerContext scorerContext) throws IOException { + return new MatchAllScorer(context.reader, similarity, this, + normsField != null ? context.reader.norms(normsField) : null); } @Override - public Explanation explain(IndexReader reader, int doc) { + public Explanation explain(AtomicReaderContext context, int doc) { // explain query weight Explanation queryExpl = new ComplexExplanation (true, getValue(), "MatchAllDocsQuery, product of:"); diff --git a/lucene/src/java/org/apache/lucene/search/MaxNonCompetitiveBoostAttribute.java b/lucene/src/java/org/apache/lucene/search/MaxNonCompetitiveBoostAttribute.java index 56505f0754e..4fd168e6b10 100644 --- a/lucene/src/java/org/apache/lucene/search/MaxNonCompetitiveBoostAttribute.java +++ b/lucene/src/java/org/apache/lucene/search/MaxNonCompetitiveBoostAttribute.java @@ -22,13 +22,13 @@ import org.apache.lucene.util.AttributeSource; // javadocs only import org.apache.lucene.util.BytesRef; /** Add this {@link Attribute} to a fresh {@link AttributeSource} before calling - * {@link MultiTermQuery#getTermsEnum(IndexReader,AttributeSource)}. + * {@link MultiTermQuery#getTermsEnum(Terms,AttributeSource)}. * {@link FuzzyQuery} is using this to control its internal behaviour * to only return competitive terms. *

    Please note: This attribute is intended to be added by the {@link MultiTermQuery.RewriteMethod} * to an empty {@link AttributeSource} that is shared for all segments * during query rewrite. This attribute source is passed to all segment enums - * on {@link MultiTermQuery#getTermsEnum(IndexReader,AttributeSource)}. + * on {@link MultiTermQuery#getTermsEnum(Terms,AttributeSource)}. * {@link TopTermsRewrite} uses this attribute to * inform all enums about the current boost, that is not competitive. * @lucene.internal diff --git a/lucene/src/java/org/apache/lucene/search/MaxNonCompetitiveBoostAttributeImpl.java b/lucene/src/java/org/apache/lucene/search/MaxNonCompetitiveBoostAttributeImpl.java index e4ffe304084..629f600c677 100644 --- a/lucene/src/java/org/apache/lucene/search/MaxNonCompetitiveBoostAttributeImpl.java +++ b/lucene/src/java/org/apache/lucene/search/MaxNonCompetitiveBoostAttributeImpl.java @@ -48,25 +48,6 @@ public final class MaxNonCompetitiveBoostAttributeImpl extends AttributeImpl imp maxNonCompetitiveBoost = Float.NEGATIVE_INFINITY; competitiveTerm = null; } - - @Override - public boolean equals(Object other) { - if (this == other) - return true; - if (other instanceof MaxNonCompetitiveBoostAttributeImpl) { - final MaxNonCompetitiveBoostAttributeImpl o = (MaxNonCompetitiveBoostAttributeImpl) other; - return (o.maxNonCompetitiveBoost == maxNonCompetitiveBoost) - && (o.competitiveTerm == null ? competitiveTerm == null : o.competitiveTerm.equals(competitiveTerm)); - } - return false; - } - - @Override - public int hashCode() { - int hash = Float.floatToIntBits(maxNonCompetitiveBoost); - if (competitiveTerm != null) hash = 31 * hash + competitiveTerm.hashCode(); - return hash; - } @Override public void copyTo(AttributeImpl target) { diff --git a/lucene/src/java/org/apache/lucene/search/MultiCollector.java b/lucene/src/java/org/apache/lucene/search/MultiCollector.java index ee79f549b0b..682413d7a18 100644 --- a/lucene/src/java/org/apache/lucene/search/MultiCollector.java +++ b/lucene/src/java/org/apache/lucene/search/MultiCollector.java @@ -19,14 +19,14 @@ package org.apache.lucene.search; import java.io.IOException; -import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexReader.AtomicReaderContext; import org.apache.lucene.search.Collector; import org.apache.lucene.search.Scorer; /** * A {@link Collector} which allows running a search with several * {@link Collector}s. It offers a static {@link #wrap} method which accepts a - * list of collectots and wraps them with {@link MultiCollector}, while + * list of collectors and wraps them with {@link MultiCollector}, while * filtering out the null null ones. */ public class MultiCollector extends Collector { @@ -108,9 +108,9 @@ public class MultiCollector extends Collector { } @Override - public void setNextReader(IndexReader reader, int o) throws IOException { + public void setNextReader(AtomicReaderContext context) throws IOException { for (Collector c : collectors) { - c.setNextReader(reader, o); + c.setNextReader(context); } } diff --git a/lucene/src/java/org/apache/lucene/search/MultiPhraseQuery.java b/lucene/src/java/org/apache/lucene/search/MultiPhraseQuery.java index 2eb23cd7bfd..c5c979cb904 100644 --- a/lucene/src/java/org/apache/lucene/search/MultiPhraseQuery.java +++ b/lucene/src/java/org/apache/lucene/search/MultiPhraseQuery.java @@ -21,9 +21,11 @@ import java.io.IOException; import java.util.*; import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexReader.AtomicReaderContext; import org.apache.lucene.index.Term; import org.apache.lucene.index.DocsEnum; import org.apache.lucene.index.DocsAndPositionsEnum; +import org.apache.lucene.search.Explanation.IDFExplanation; import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.ToStringUtils; @@ -130,21 +132,24 @@ public class MultiPhraseQuery extends Query { private class MultiPhraseWeight extends Weight { private Similarity similarity; private float value; + private final IDFExplanation idfExp; private float idf; private float queryNorm; private float queryWeight; public MultiPhraseWeight(IndexSearcher searcher) throws IOException { - this.similarity = getSimilarity(searcher); + this.similarity = searcher.getSimilarityProvider().get(field); // compute idf - final int maxDoc = searcher.maxDoc(); + ArrayList allTerms = new ArrayList(); for(final Term[] terms: termArrays) { for (Term term: terms) { - idf += this.similarity.idf(searcher.docFreq(term), maxDoc); + allTerms.add(term); } } + idfExp = similarity.idfExplain(allTerms, searcher); + idf = idfExp.getIdf(); } @Override @@ -167,10 +172,10 @@ public class MultiPhraseQuery extends Query { } @Override - public Scorer scorer(IndexReader reader, boolean scoreDocsInOrder, boolean topScorer) throws IOException { + public Scorer scorer(AtomicReaderContext context, ScorerContext scorerContext) throws IOException { if (termArrays.size() == 0) // optimize zero-term case return null; - + final IndexReader reader = context.reader; final Bits delDocs = reader.getDeletedDocs(); PhraseQuery.PostingsAndFreq[] postingsFreqs = new PhraseQuery.PostingsAndFreq[termArrays.size()]; @@ -219,7 +224,7 @@ public class MultiPhraseQuery extends Query { if (slop == 0) { ExactPhraseScorer s = new ExactPhraseScorer(this, postingsFreqs, similarity, - reader.norms(field)); + reader.norms(field)); if (s.noDocs) { return null; } else { @@ -232,12 +237,12 @@ public class MultiPhraseQuery extends Query { } @Override - public Explanation explain(IndexReader reader, int doc) + public Explanation explain(AtomicReaderContext context, int doc) throws IOException { ComplexExplanation result = new ComplexExplanation(); result.setDescription("weight("+getQuery()+" in "+doc+"), product of:"); - Explanation idfExpl = new Explanation(idf, "idf("+getQuery()+")"); + Explanation idfExpl = new Explanation(idf, "idf(" + field + ":" + idfExp.explain() +")"); // explain query weight Explanation queryExpl = new Explanation(); @@ -263,7 +268,7 @@ public class MultiPhraseQuery extends Query { fieldExpl.setDescription("fieldWeight("+getQuery()+" in "+doc+ "), product of:"); - Scorer scorer = scorer(reader, true, false); + Scorer scorer = scorer(context, ScorerContext.def()); if (scorer == null) { return new Explanation(0.0f, "no matching docs"); } @@ -283,7 +288,7 @@ public class MultiPhraseQuery extends Query { fieldExpl.addDetail(idfExpl); Explanation fieldNormExpl = new Explanation(); - byte[] fieldNorms = reader.norms(field); + byte[] fieldNorms = context.reader.norms(field); float fieldNorm = fieldNorms!=null ? similarity.decodeNormValue(fieldNorms[doc]) : 1.0f; fieldNormExpl.setValue(fieldNorm); diff --git a/lucene/src/java/org/apache/lucene/search/MultiTermQuery.java b/lucene/src/java/org/apache/lucene/search/MultiTermQuery.java index a3a1fe70e24..8051d7fc160 100644 --- a/lucene/src/java/org/apache/lucene/search/MultiTermQuery.java +++ b/lucene/src/java/org/apache/lucene/search/MultiTermQuery.java @@ -26,6 +26,7 @@ import org.apache.lucene.index.Terms; import org.apache.lucene.index.TermsEnum; import org.apache.lucene.queryParser.QueryParser; import org.apache.lucene.util.AttributeSource; +import org.apache.lucene.util.PerReaderTermState; /** * An abstract {@link Query} that matches documents @@ -33,7 +34,7 @@ import org.apache.lucene.util.AttributeSource; * FilteredTermsEnum} enumeration. * *

    This query cannot be used directly; you must subclass - * it and define {@link #getTermsEnum(IndexReader,AttributeSource)} to provide a {@link + * it and define {@link #getTermsEnum(Terms,AttributeSource)} to provide a {@link * FilteredTermsEnum} that iterates through the terms to be * matched. * @@ -159,8 +160,8 @@ public abstract class MultiTermQuery extends Query { } @Override - protected void addClause(BooleanQuery topLevel, Term term, int docCount, float boost) { - final TermQuery tq = new TermQuery(term, docCount); + protected void addClause(BooleanQuery topLevel, Term term, int docCount, float boost, PerReaderTermState states) { + final TermQuery tq = new TermQuery(term, states); tq.setBoost(boost); topLevel.add(tq, BooleanClause.Occur.SHOULD); } @@ -200,8 +201,8 @@ public abstract class MultiTermQuery extends Query { } @Override - protected void addClause(BooleanQuery topLevel, Term term, int docFreq, float boost) { - final Query q = new ConstantScoreQuery(new TermQuery(term, docFreq)); + protected void addClause(BooleanQuery topLevel, Term term, int docFreq, float boost, PerReaderTermState states) { + final Query q = new ConstantScoreQuery(new TermQuery(term, states)); q.setBoost(boost); topLevel.add(q, BooleanClause.Occur.SHOULD); } diff --git a/lucene/src/java/org/apache/lucene/search/MultiTermQueryWrapperFilter.java b/lucene/src/java/org/apache/lucene/search/MultiTermQueryWrapperFilter.java index 21b271a550b..8a6df063b83 100644 --- a/lucene/src/java/org/apache/lucene/search/MultiTermQueryWrapperFilter.java +++ b/lucene/src/java/org/apache/lucene/search/MultiTermQueryWrapperFilter.java @@ -19,8 +19,9 @@ package org.apache.lucene.search; import java.io.IOException; -import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Fields; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexReader.AtomicReaderContext; import org.apache.lucene.index.Terms; import org.apache.lucene.index.TermsEnum; import org.apache.lucene.index.DocsEnum; @@ -104,7 +105,8 @@ public class MultiTermQueryWrapperFilter extends Filte * results. */ @Override - public DocIdSet getDocIdSet(IndexReader reader) throws IOException { + public DocIdSet getDocIdSet(AtomicReaderContext context) throws IOException { + final IndexReader reader = context.reader; final Fields fields = reader.fields(); if (fields == null) { // reader has no fields @@ -121,7 +123,7 @@ public class MultiTermQueryWrapperFilter extends Filte assert termsEnum != null; if (termsEnum.next() != null) { // fill into a OpenBitSet - final OpenBitSet bitSet = new OpenBitSet(reader.maxDoc()); + final OpenBitSet bitSet = new OpenBitSet(context.reader.maxDoc()); int termCount = 0; final Bits delDocs = reader.getDeletedDocs(); DocsEnum docsEnum = null; diff --git a/lucene/src/java/org/apache/lucene/search/NumericRangeFilter.java b/lucene/src/java/org/apache/lucene/search/NumericRangeFilter.java index 69ba4ace02e..f3a2dc1a66c 100644 --- a/lucene/src/java/org/apache/lucene/search/NumericRangeFilter.java +++ b/lucene/src/java/org/apache/lucene/search/NumericRangeFilter.java @@ -39,8 +39,6 @@ import org.apache.lucene.util.NumericUtils; // for javadocs * See {@link NumericRangeQuery} for details on how Lucene * indexes and searches numeric valued fields. * - * @lucene.experimental - * * @since 2.9 **/ public final class NumericRangeFilter extends MultiTermQueryWrapperFilter> { diff --git a/lucene/src/java/org/apache/lucene/search/NumericRangeQuery.java b/lucene/src/java/org/apache/lucene/search/NumericRangeQuery.java index 2cba90ad0bd..1daa453383c 100644 --- a/lucene/src/java/org/apache/lucene/search/NumericRangeQuery.java +++ b/lucene/src/java/org/apache/lucene/search/NumericRangeQuery.java @@ -76,8 +76,6 @@ import org.apache.lucene.index.TermsEnum; * BooleanQuery rewrite methods without changing * BooleanQuery's default max clause count. * - * @lucene.experimental - * *

    How it works

    * *

    See the publication about panFMP, diff --git a/lucene/src/java/org/apache/lucene/search/PhraseQuery.java b/lucene/src/java/org/apache/lucene/search/PhraseQuery.java index c5c287b84a8..8c71ad78bd5 100644 --- a/lucene/src/java/org/apache/lucene/search/PhraseQuery.java +++ b/lucene/src/java/org/apache/lucene/search/PhraseQuery.java @@ -21,6 +21,7 @@ import java.io.IOException; import java.util.Set; import java.util.ArrayList; +import org.apache.lucene.index.IndexReader.AtomicReaderContext; import org.apache.lucene.index.Term; import org.apache.lucene.index.DocsAndPositionsEnum; import org.apache.lucene.index.IndexReader; @@ -145,7 +146,7 @@ public class PhraseQuery extends Query { public PhraseWeight(IndexSearcher searcher) throws IOException { - this.similarity = getSimilarity(searcher); + this.similarity = searcher.getSimilarityProvider().get(field); idfExp = similarity.idfExplain(terms, searcher); idf = idfExp.getIdf(); @@ -174,10 +175,10 @@ public class PhraseQuery extends Query { } @Override - public Scorer scorer(IndexReader reader, boolean scoreDocsInOrder, boolean topScorer) throws IOException { + public Scorer scorer(AtomicReaderContext context, ScorerContext scorerContext) throws IOException { if (terms.size() == 0) // optimize zero-term case return null; - + final IndexReader reader = context.reader; PostingsAndFreq[] postingsFreqs = new PostingsAndFreq[terms.size()]; final Bits delDocs = reader.getDeletedDocs(); for (int i = 0; i < terms.size(); i++) { @@ -206,7 +207,7 @@ public class PhraseQuery extends Query { if (slop == 0) { // optimize exact case ExactPhraseScorer s = new ExactPhraseScorer(this, postingsFreqs, similarity, - reader.norms(field)); + reader.norms(field)); if (s.noDocs) { return null; } else { @@ -215,12 +216,12 @@ public class PhraseQuery extends Query { } else { return new SloppyPhraseScorer(this, postingsFreqs, similarity, slop, - reader.norms(field)); + reader.norms(field)); } } @Override - public Explanation explain(IndexReader reader, int doc) + public Explanation explain(AtomicReaderContext context, int doc) throws IOException { Explanation result = new Explanation(); @@ -267,7 +268,7 @@ public class PhraseQuery extends Query { fieldExpl.setDescription("fieldWeight("+field+":"+query+" in "+doc+ "), product of:"); - Scorer scorer = scorer(reader, true, false); + Scorer scorer = scorer(context, ScorerContext.def()); if (scorer == null) { return new Explanation(0.0f, "no matching docs"); } @@ -287,7 +288,7 @@ public class PhraseQuery extends Query { fieldExpl.addDetail(idfExpl); Explanation fieldNormExpl = new Explanation(); - byte[] fieldNorms = reader.norms(field); + byte[] fieldNorms = context.reader.norms(field); float fieldNorm = fieldNorms!=null ? similarity.decodeNormValue(fieldNorms[doc]) : 1.0f; fieldNormExpl.setValue(fieldNorm); diff --git a/lucene/src/java/org/apache/lucene/search/PhraseScorer.java b/lucene/src/java/org/apache/lucene/search/PhraseScorer.java index 1f9dc6375c6..1fedc2eb3ee 100644 --- a/lucene/src/java/org/apache/lucene/search/PhraseScorer.java +++ b/lucene/src/java/org/apache/lucene/search/PhraseScorer.java @@ -40,9 +40,12 @@ abstract class PhraseScorer extends Scorer { private float freq; //phrase frequency in current doc as computed by phraseFreq(). + protected final Similarity similarity; + PhraseScorer(Weight weight, PhraseQuery.PostingsAndFreq[] postings, Similarity similarity, byte[] norms) { - super(similarity, weight); + super(weight); + this.similarity = similarity; this.norms = norms; this.value = weight.getValue(); @@ -105,8 +108,8 @@ abstract class PhraseScorer extends Scorer { @Override public float score() throws IOException { //System.out.println("scoring " + first.doc); - float raw = getSimilarity().tf(freq) * value; // raw score - return norms == null ? raw : raw * getSimilarity().decodeNormValue(norms[first.doc]); // normalize + float raw = similarity.tf(freq) * value; // raw score + return norms == null ? raw : raw * similarity.decodeNormValue(norms[first.doc]); // normalize } @Override diff --git a/lucene/src/java/org/apache/lucene/search/PositiveScoresOnlyCollector.java b/lucene/src/java/org/apache/lucene/search/PositiveScoresOnlyCollector.java index 2dd47bcbfe1..1e7cca99fa8 100644 --- a/lucene/src/java/org/apache/lucene/search/PositiveScoresOnlyCollector.java +++ b/lucene/src/java/org/apache/lucene/search/PositiveScoresOnlyCollector.java @@ -19,7 +19,7 @@ package org.apache.lucene.search; import java.io.IOException; -import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexReader.AtomicReaderContext; /** * A {@link Collector} implementation which wraps another @@ -43,8 +43,8 @@ public class PositiveScoresOnlyCollector extends Collector { } @Override - public void setNextReader(IndexReader reader, int docBase) throws IOException { - c.setNextReader(reader, docBase); + public void setNextReader(AtomicReaderContext context) throws IOException { + c.setNextReader(context); } @Override diff --git a/lucene/src/java/org/apache/lucene/search/Query.java b/lucene/src/java/org/apache/lucene/search/Query.java index 8cffc52d2f4..8b937aa5b32 100644 --- a/lucene/src/java/org/apache/lucene/search/Query.java +++ b/lucene/src/java/org/apache/lucene/search/Query.java @@ -98,7 +98,7 @@ public abstract class Query implements java.io.Serializable, Cloneable { Query query = searcher.rewrite(this); Weight weight = query.createWeight(searcher); float sum = weight.sumOfSquaredWeights(); - float norm = getSimilarity(searcher).queryNorm(sum); + float norm = searcher.getSimilarityProvider().queryNorm(sum); if (Float.isInfinite(norm) || Float.isNaN(norm)) norm = 1.0f; weight.normalize(norm); @@ -124,15 +124,6 @@ public abstract class Query implements java.io.Serializable, Cloneable { // needs to be implemented by query subclasses throw new UnsupportedOperationException(); } - - - /** Expert: Returns the Similarity implementation to be used for this query. - * Subclasses may override this method to specify their own Similarity - * implementation, perhaps one that delegates through that of the Searcher. - * By default the Searcher's Similarity implementation is returned.*/ - public Similarity getSimilarity(IndexSearcher searcher) { - return searcher.getSimilarity(); - } /** Returns a clone of this query. */ @Override diff --git a/lucene/src/java/org/apache/lucene/search/QueryWrapperFilter.java b/lucene/src/java/org/apache/lucene/search/QueryWrapperFilter.java index 3aa6d4d2245..175e36d2d45 100644 --- a/lucene/src/java/org/apache/lucene/search/QueryWrapperFilter.java +++ b/lucene/src/java/org/apache/lucene/search/QueryWrapperFilter.java @@ -19,8 +19,8 @@ package org.apache.lucene.search; import java.io.IOException; - -import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexReader.AtomicReaderContext; +import org.apache.lucene.search.Weight.ScorerContext; /** * Constrains search results to only match those which also match a provided @@ -48,12 +48,15 @@ public class QueryWrapperFilter extends Filter { } @Override - public DocIdSet getDocIdSet(final IndexReader reader) throws IOException { - final Weight weight = query.weight(new IndexSearcher(reader)); + public DocIdSet getDocIdSet(final AtomicReaderContext context) throws IOException { + // get a private context that is used to rewrite, createWeight and score eventually + assert context.reader.getTopReaderContext().isAtomic; + final AtomicReaderContext privateContext = (AtomicReaderContext) context.reader.getTopReaderContext(); + final Weight weight = query.weight(new IndexSearcher(privateContext)); return new DocIdSet() { @Override public DocIdSetIterator iterator() throws IOException { - return weight.scorer(reader, true, false); + return weight.scorer(privateContext, ScorerContext.def()); } @Override public boolean isCacheable() { return false; } diff --git a/lucene/src/java/org/apache/lucene/search/ReqExclScorer.java b/lucene/src/java/org/apache/lucene/search/ReqExclScorer.java index c365294fe55..cbf86c73ff2 100644 --- a/lucene/src/java/org/apache/lucene/search/ReqExclScorer.java +++ b/lucene/src/java/org/apache/lucene/search/ReqExclScorer.java @@ -36,7 +36,7 @@ class ReqExclScorer extends Scorer { * @param exclDisi indicates exclusion. */ public ReqExclScorer(Scorer reqScorer, DocIdSetIterator exclDisi) { - super(null); // No similarity used. + super(reqScorer.weight); this.reqScorer = reqScorer; this.exclDisi = exclDisi; } diff --git a/lucene/src/java/org/apache/lucene/search/ReqOptSumScorer.java b/lucene/src/java/org/apache/lucene/search/ReqOptSumScorer.java index cab09c864c8..580de7838d6 100644 --- a/lucene/src/java/org/apache/lucene/search/ReqOptSumScorer.java +++ b/lucene/src/java/org/apache/lucene/search/ReqOptSumScorer.java @@ -38,7 +38,7 @@ class ReqOptSumScorer extends Scorer { Scorer reqScorer, Scorer optScorer) { - super(null); // No similarity used. + super(reqScorer.weight); this.reqScorer = reqScorer; this.optScorer = optScorer; } diff --git a/lucene/src/java/org/apache/lucene/search/ScoreCachingWrappingScorer.java b/lucene/src/java/org/apache/lucene/search/ScoreCachingWrappingScorer.java index 09a0bcd817d..4aac1b185e4 100644 --- a/lucene/src/java/org/apache/lucene/search/ScoreCachingWrappingScorer.java +++ b/lucene/src/java/org/apache/lucene/search/ScoreCachingWrappingScorer.java @@ -38,19 +38,14 @@ public class ScoreCachingWrappingScorer extends Scorer { /** Creates a new instance by wrapping the given scorer. */ public ScoreCachingWrappingScorer(Scorer scorer) { - super(scorer.getSimilarity()); + super(scorer.weight); this.scorer = scorer; } @Override - protected boolean score(Collector collector, int max, int firstDocID) throws IOException { + public boolean score(Collector collector, int max, int firstDocID) throws IOException { return scorer.score(collector, max, firstDocID); } - - @Override - public Similarity getSimilarity() { - return scorer.getSimilarity(); - } @Override public float score() throws IOException { diff --git a/lucene/src/java/org/apache/lucene/search/ScoreDoc.java b/lucene/src/java/org/apache/lucene/search/ScoreDoc.java index 759f71afaf7..f2828d509a8 100644 --- a/lucene/src/java/org/apache/lucene/search/ScoreDoc.java +++ b/lucene/src/java/org/apache/lucene/search/ScoreDoc.java @@ -24,7 +24,7 @@ public class ScoreDoc implements java.io.Serializable { public float score; /** Expert: A hit document's number. - * @see Searcher#doc(int) + * @see IndexSearcher#doc(int) */ public int doc; diff --git a/lucene/src/java/org/apache/lucene/search/Scorer.java b/lucene/src/java/org/apache/lucene/search/Scorer.java index 84e51431e88..8642f957aa7 100644 --- a/lucene/src/java/org/apache/lucene/search/Scorer.java +++ b/lucene/src/java/org/apache/lucene/search/Scorer.java @@ -40,31 +40,16 @@ import org.apache.lucene.search.BooleanClause.Occur; * with these scores. */ public abstract class Scorer extends DocIdSetIterator { - private final Similarity similarity; protected final Weight weight; - /** Constructs a Scorer. - * @param similarity The Similarity implementation used by this scorer. - */ - protected Scorer(Similarity similarity) { - this(similarity, null); - } - /** * Constructs a Scorer - * @param similarity The Similarity implementation used by this scorer. - * @param weight The scorers Weight + * @param weight The scorers Weight. */ - protected Scorer(Similarity similarity, Weight weight) { - this.similarity = similarity; + protected Scorer(Weight weight) { this.weight = weight; } - /** Returns the Similarity implementation used by this scorer. */ - public Similarity getSimilarity() { - return this.similarity; - } - /** Scores and collects all matching documents. * @param collector The collector to which all matching documents are passed. */ @@ -90,7 +75,7 @@ public abstract class Scorer extends DocIdSetIterator { * this method. * @return true if more matching documents may remain. */ - protected boolean score(Collector collector, int max, int firstDocID) throws IOException { + public boolean score(Collector collector, int max, int firstDocID) throws IOException { collector.setScorer(this); int doc = firstDocID; while (doc < max) { @@ -172,7 +157,7 @@ public abstract class Scorer extends DocIdSetIterator { *

    * Note: this method will throw {@link UnsupportedOperationException} if no * associated {@link Weight} instance is provided to - * {@link #Scorer(Similarity, Weight)} + * {@link #Scorer(Weight)} *

    * * @lucene.experimental diff --git a/lucene/src/java/org/apache/lucene/search/ScoringRewrite.java b/lucene/src/java/org/apache/lucene/search/ScoringRewrite.java index 355d64e8701..3cf74641b09 100644 --- a/lucene/src/java/org/apache/lucene/search/ScoringRewrite.java +++ b/lucene/src/java/org/apache/lucene/search/ScoringRewrite.java @@ -20,6 +20,7 @@ package org.apache.lucene.search; import java.io.IOException; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Term; +import org.apache.lucene.index.TermState; import org.apache.lucene.index.TermsEnum; import org.apache.lucene.search.MultiTermQuery.RewriteMethod; @@ -27,6 +28,7 @@ import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.ByteBlockPool; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRefHash; +import org.apache.lucene.util.PerReaderTermState; import org.apache.lucene.util.RamUsageEstimator; import org.apache.lucene.util.BytesRefHash.DirectBytesStartArray; @@ -53,8 +55,9 @@ public abstract class ScoringRewrite extends TermCollectingRewr } @Override - protected void addClause(BooleanQuery topLevel, Term term, int docCount, float boost) { - final TermQuery tq = new TermQuery(term, docCount); + protected void addClause(BooleanQuery topLevel, Term term, int docCount, + float boost, PerReaderTermState states) { + final TermQuery tq = new TermQuery(term, states); tq.setBoost(boost); topLevel.add(tq, BooleanClause.Occur.SHOULD); } @@ -114,13 +117,13 @@ public abstract class ScoringRewrite extends TermCollectingRewr final int size = col.terms.size(); if (size > 0) { final int sort[] = col.terms.sort(col.termsEnum.getComparator()); - final int[] docFreq = col.array.docFreq; final float[] boost = col.array.boost; + final PerReaderTermState[] termStates = col.array.termState; for (int i = 0; i < size; i++) { final int pos = sort[i]; final Term term = placeholderTerm.createTerm(col.terms.get(pos, new BytesRef())); - assert reader.docFreq(term) == docFreq[pos]; - addClause(result, term, docFreq[pos], query.getBoost() * boost[pos]); + assert reader.docFreq(term) == termStates[pos].docFreq(); + addClause(result, term, termStates[pos].docFreq(), query.getBoost() * boost[pos], termStates[pos]); } } query.incTotalNumberOfTerms(size); @@ -143,15 +146,17 @@ public abstract class ScoringRewrite extends TermCollectingRewr @Override public boolean collect(BytesRef bytes) throws IOException { final int e = terms.add(bytes); + final TermState state = termsEnum.termState(); + assert state != null; if (e < 0 ) { // duplicate term: update docFreq final int pos = (-e)-1; - array.docFreq[pos] += termsEnum.docFreq(); + array.termState[pos].register(state, readerContext.ord, termsEnum.docFreq()); assert array.boost[pos] == boostAtt.getBoost() : "boost should be equal in all segment TermsEnums"; } else { // new entry: we populate the entry initially - array.docFreq[e] = termsEnum.docFreq(); array.boost[e] = boostAtt.getBoost(); + array.termState[e] = new PerReaderTermState(topReaderContext, state, readerContext.ord, termsEnum.docFreq()); ScoringRewrite.this.checkMaxClauseCount(terms.size()); } return true; @@ -160,8 +165,8 @@ public abstract class ScoringRewrite extends TermCollectingRewr /** Special implementation of BytesStartArray that keeps parallel arrays for boost and docFreq */ static final class TermFreqBoostByteStart extends DirectBytesStartArray { - int[] docFreq; float[] boost; + PerReaderTermState[] termState; public TermFreqBoostByteStart(int initSize) { super(initSize); @@ -171,24 +176,28 @@ public abstract class ScoringRewrite extends TermCollectingRewr public int[] init() { final int[] ord = super.init(); boost = new float[ArrayUtil.oversize(ord.length, RamUsageEstimator.NUM_BYTES_FLOAT)]; - docFreq = new int[ArrayUtil.oversize(ord.length, RamUsageEstimator.NUM_BYTES_INT)]; - assert boost.length >= ord.length && docFreq.length >= ord.length; + termState = new PerReaderTermState[ArrayUtil.oversize(ord.length, RamUsageEstimator.NUM_BYTES_OBJECT_REF)]; + assert termState.length >= ord.length && boost.length >= ord.length; return ord; } @Override public int[] grow() { final int[] ord = super.grow(); - docFreq = ArrayUtil.grow(docFreq, ord.length); boost = ArrayUtil.grow(boost, ord.length); - assert boost.length >= ord.length && docFreq.length >= ord.length; + if (termState.length < ord.length) { + PerReaderTermState[] tmpTermState = new PerReaderTermState[ArrayUtil.oversize(ord.length, RamUsageEstimator.NUM_BYTES_OBJECT_REF)]; + System.arraycopy(termState, 0, tmpTermState, 0, termState.length); + termState = tmpTermState; + } + assert termState.length >= ord.length && boost.length >= ord.length; return ord; } @Override public int[] clear() { boost = null; - docFreq = null; + termState = null; return super.clear(); } diff --git a/lucene/src/java/org/apache/lucene/search/Similarity.java b/lucene/src/java/org/apache/lucene/search/Similarity.java index a9916eca955..306f904c270 100644 --- a/lucene/src/java/org/apache/lucene/search/Similarity.java +++ b/lucene/src/java/org/apache/lucene/search/Similarity.java @@ -362,7 +362,7 @@ import org.apache.lucene.util.SmallFloat; * Typically, a document that contains more of the query's terms will receive a higher score * than another document with fewer query terms. * This is a search time factor computed in - * {@link #coord(int, int) coord(q,d)} + * {@link SimilarityProvider#coord(int, int) coord(q,d)} * by the Similarity in effect at search time. *
     
    *
  • @@ -462,12 +462,14 @@ import org.apache.lucene.util.SmallFloat; * {@link org.apache.lucene.document.Fieldable#setBoost(float) field.setBoost()} * before adding the field to a document. * - *
  • {@link #lengthNorm(String, int) lengthNorm(field)} - computed + *
  • lengthNorm - computed * when the document is added to the index in accordance with the number of tokens * of this field in the document, so that shorter fields contribute more to the score. * LengthNorm is computed by the Similarity class in effect at indexing. *
  • * + * The {@link #computeNorm} method is responsible for + * combining all of these factors into a single float. * *

    * When a document is added to the index, all the above factors are multiplied. @@ -480,7 +482,7 @@ import org.apache.lucene.util.SmallFloat; * norm(t,d)   =   * {@link org.apache.lucene.document.Document#getBoost() doc.getBoost()} *  ·  - * {@link #lengthNorm(String, int) lengthNorm(field)} + * lengthNorm *  ·  * * @@ -520,40 +522,13 @@ import org.apache.lucene.util.SmallFloat; * * * - * @see #setDefault(Similarity) - * @see org.apache.lucene.index.IndexWriter#setSimilarity(Similarity) - * @see Searcher#setSimilarity(Similarity) + * @see org.apache.lucene.index.IndexWriterConfig#setSimilarityProvider(SimilarityProvider) + * @see IndexSearcher#setSimilarityProvider(SimilarityProvider) */ public abstract class Similarity implements Serializable { - /** - * The Similarity implementation used by default. - **/ - private static Similarity defaultImpl = new DefaultSimilarity(); public static final int NO_DOC_ID_PROVIDED = -1; - /** Set the default Similarity implementation used by indexing and search - * code. - * - * @see Searcher#setSimilarity(Similarity) - * @see org.apache.lucene.index.IndexWriter#setSimilarity(Similarity) - */ - public static void setDefault(Similarity similarity) { - Similarity.defaultImpl = similarity; - } - - /** Return the default Similarity implementation used by indexing and search - * code. - * - *

    This is initially an instance of {@link DefaultSimilarity}. - * - * @see Searcher#setSimilarity(Similarity) - * @see org.apache.lucene.index.IndexWriter#setSimilarity(Similarity) - */ - public static Similarity getDefault() { - return Similarity.defaultImpl; - } - /** Cache of decoded bytes. */ private static final float[] NORM_TABLE = new float[256]; @@ -570,12 +545,23 @@ public abstract class Similarity implements Serializable { } /** - * Compute the normalization value for a field, given the accumulated + * Computes the normalization value for a field, given the accumulated * state of term processing for this field (see {@link FieldInvertState}). * *

    Implementations should calculate a float value based on the field * state and then return that value. * + *

    Matches in longer fields are less precise, so implementations of this + * method usually return smaller values when state.getLength() is large, + * and larger values when state.getLength() is small. + * + *

    Note that the return values are computed under + * {@link org.apache.lucene.index.IndexWriter#addDocument(org.apache.lucene.document.Document)} + * and then stored using + * {@link #encodeNormValue(float)}. + * Thus they have limited precision, and documents + * must be re-indexed if this method is altered. + * *

    For backward compatibility this method by default calls * {@link #lengthNorm(String, int)} passing * {@link FieldInvertState#getLength()} as the second argument, and @@ -587,9 +573,7 @@ public abstract class Similarity implements Serializable { * @param state current processing state for this field * @return the calculated float norm */ - public float computeNorm(String field, FieldInvertState state) { - return (state.getBoost() * lengthNorm(field, state.getLength())); - } + public abstract float computeNorm(String field, FieldInvertState state); /** Computes the normalization value for a field given the total number of * terms contained in a field. These values, together with field boosts, are @@ -613,23 +597,13 @@ public abstract class Similarity implements Serializable { * @return a normalization factor for hits on this field of this document * * @see org.apache.lucene.document.Field#setBoost(float) - */ - public abstract float lengthNorm(String fieldName, int numTokens); - - /** Computes the normalization value for a query given the sum of the squared - * weights of each of the query terms. This value is multiplied into the - * weight of each query term. While the classic query normalization factor is - * computed as 1/sqrt(sumOfSquaredWeights), other implementations might - * completely ignore sumOfSquaredWeights (ie return 1). * - *

    This does not affect ranking, but the default implementation does make scores - * from different queries more comparable than they would be by eliminating the - * magnitude of the Query vector as a factor in the score. - * - * @param sumOfSquaredWeights the sum of the squares of query term weights - * @return a normalization factor for query weights + * @deprecated Please override computeNorm instead */ - public abstract float queryNorm(float sumOfSquaredWeights); + @Deprecated + public final float lengthNorm(String fieldName, int numTokens) { + throw new UnsupportedOperationException("please use computeNorm instead"); + } /** Encodes a normalization factor for storage in an index. * @@ -641,7 +615,6 @@ public abstract class Similarity implements Serializable { * are rounded down to the largest representable value. Positive values too * small to represent are rounded up to the smallest positive representable * value. - * * @see org.apache.lucene.document.Field#setBoost(float) * @see org.apache.lucene.util.SmallFloat */ @@ -709,11 +682,11 @@ public abstract class Similarity implements Serializable { * idf(docFreq, searcher.maxDoc()); *

    * - * Note that {@link Searcher#maxDoc()} is used instead of + * Note that {@link IndexSearcher#maxDoc()} is used instead of * {@link org.apache.lucene.index.IndexReader#numDocs() IndexReader#numDocs()} because also - * {@link Searcher#docFreq(Term)} is used, and when the latter - * is inaccurate, so is {@link Searcher#maxDoc()}, and in the same direction. - * In addition, {@link Searcher#maxDoc()} is more efficient to compute + * {@link IndexSearcher#docFreq(Term)} is used, and when the latter + * is inaccurate, so is {@link IndexSearcher#maxDoc()}, and in the same direction. + * In addition, {@link IndexSearcher#maxDoc()} is more efficient to compute * * @param term the term in question * @param searcher the document collection being searched @@ -736,16 +709,16 @@ public abstract class Similarity implements Serializable { public float getIdf() { return idf; }}; - } + } /** * This method forwards to {@link - * #idfExplain(Term,Searcher,int)} by passing + * #idfExplain(Term,IndexSearcher,int)} by passing * searcher.docFreq(term) as the docFreq. */ public IDFExplanation idfExplain(final Term term, final IndexSearcher searcher) throws IOException { return idfExplain(term, searcher, searcher.docFreq(term)); - } + } /** * Computes a score factor for a phrase. @@ -801,20 +774,6 @@ public abstract class Similarity implements Serializable { */ public abstract float idf(int docFreq, int numDocs); - /** Computes a score factor based on the fraction of all query terms that a - * document contains. This value is multiplied into scores. - * - *

    The presence of a large portion of the query terms indicates a better - * match with the query, so implementations of this method usually return - * larger values when the ratio between these parameters is large and smaller - * values when the ratio between them is small. - * - * @param overlap the number of query terms matched in the document - * @param maxOverlap the total number of terms in the query - * @return a score factor based on term overlap with the query - */ - public abstract float coord(int overlap, int maxOverlap); - /** * Calculate a scoring factor based on the data in the payload. Overriding implementations * are responsible for interpreting what is in the payload. Lucene makes no assumptions about diff --git a/lucene/src/java/org/apache/lucene/search/SimilarityDelegator.java b/lucene/src/java/org/apache/lucene/search/SimilarityDelegator.java deleted file mode 100644 index b83800c33e4..00000000000 --- a/lucene/src/java/org/apache/lucene/search/SimilarityDelegator.java +++ /dev/null @@ -1,76 +0,0 @@ -package org.apache.lucene.search; - -import org.apache.lucene.index.FieldInvertState; - -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** Expert: Delegating scoring implementation. Useful in {@link - * Query#getSimilarity(Searcher)} implementations, to override only certain - * methods of a Searcher's Similarity implementation.. */ -public class SimilarityDelegator extends Similarity { - - private Similarity delegee; - - /** Construct a {@link Similarity} that delegates all methods to another. - * - * @param delegee the Similarity implementation to delegate to - */ - public SimilarityDelegator(Similarity delegee) { - this.delegee = delegee; - } - - @Override - public float computeNorm(String fieldName, FieldInvertState state) { - return delegee.computeNorm(fieldName, state); - } - - @Override - public float lengthNorm(String fieldName, int numTerms) { - return delegee.lengthNorm(fieldName, numTerms); - } - - @Override - public float queryNorm(float sumOfSquaredWeights) { - return delegee.queryNorm(sumOfSquaredWeights); - } - - @Override - public float tf(float freq) { - return delegee.tf(freq); - } - - @Override - public float sloppyFreq(int distance) { - return delegee.sloppyFreq(distance); - } - - @Override - public float idf(int docFreq, int numDocs) { - return delegee.idf(docFreq, numDocs); - } - - @Override - public float coord(int overlap, int maxOverlap) { - return delegee.coord(overlap, maxOverlap); - } - - @Override - public float scorePayload(int docId, String fieldName, int start, int end, byte [] payload, int offset, int length) { - return delegee.scorePayload(docId, fieldName, start, end, payload, offset, length); - } -} diff --git a/lucene/src/java/org/apache/lucene/search/SimilarityProvider.java b/lucene/src/java/org/apache/lucene/search/SimilarityProvider.java new file mode 100644 index 00000000000..ef9a034e7eb --- /dev/null +++ b/lucene/src/java/org/apache/lucene/search/SimilarityProvider.java @@ -0,0 +1,66 @@ +package org.apache.lucene.search; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Expert: Scoring API. + * + * Provides top-level scoring functions that aren't specific to a field, + * and work across multi-field queries (such as {@link BooleanQuery}). + * + * Field-specific scoring is accomplished through {@link Similarity}. + * + * @lucene.experimental + */ +public interface SimilarityProvider { + + /** Computes a score factor based on the fraction of all query terms that a + * document contains. This value is multiplied into scores. + * + *

    The presence of a large portion of the query terms indicates a better + * match with the query, so implementations of this method usually return + * larger values when the ratio between these parameters is large and smaller + * values when the ratio between them is small. + * + * @param overlap the number of query terms matched in the document + * @param maxOverlap the total number of terms in the query + * @return a score factor based on term overlap with the query + */ + public abstract float coord(int overlap, int maxOverlap); + + /** Computes the normalization value for a query given the sum of the squared + * weights of each of the query terms. This value is multiplied into the + * weight of each query term. While the classic query normalization factor is + * computed as 1/sqrt(sumOfSquaredWeights), other implementations might + * completely ignore sumOfSquaredWeights (ie return 1). + * + *

    This does not affect ranking, but the default implementation does make scores + * from different queries more comparable than they would be by eliminating the + * magnitude of the Query vector as a factor in the score. + * + * @param sumOfSquaredWeights the sum of the squares of query term weights + * @return a normalization factor for query weights + */ + public abstract float queryNorm(float sumOfSquaredWeights); + + /** Returns a {@link Similarity} for scoring a field + * @param field field name. + * @return a field-specific Similarity. + */ + public abstract Similarity get(String field); +} diff --git a/lucene/src/java/org/apache/lucene/search/SloppyPhraseScorer.java b/lucene/src/java/org/apache/lucene/search/SloppyPhraseScorer.java index 42941214d6e..cc9fad302d0 100644 --- a/lucene/src/java/org/apache/lucene/search/SloppyPhraseScorer.java +++ b/lucene/src/java/org/apache/lucene/search/SloppyPhraseScorer.java @@ -78,7 +78,7 @@ final class SloppyPhraseScorer extends PhraseScorer { int matchLength = end - start; if (matchLength <= slop) - freq += getSimilarity().sloppyFreq(matchLength); // score match + freq += similarity.sloppyFreq(matchLength); // score match if (pp.position > end) end = pp.position; diff --git a/lucene/src/java/org/apache/lucene/search/Sort.java b/lucene/src/java/org/apache/lucene/search/Sort.java index 9c2bd0e257d..7969c991609 100644 --- a/lucene/src/java/org/apache/lucene/search/Sort.java +++ b/lucene/src/java/org/apache/lucene/search/Sort.java @@ -103,7 +103,7 @@ implements Serializable { /** * Represents sorting by computed relevance. Using this sort criteria returns * the same results as calling - * {@link Searcher#search(Query,int) Searcher#search()}without a sort criteria, + * {@link IndexSearcher#search(Query,int) IndexSearcher#search()}without a sort criteria, * only with slightly more overhead. */ public static final Sort RELEVANCE = new Sort(); @@ -116,7 +116,7 @@ implements Serializable { /** * Sorts by computed relevance. This is the same sort criteria as calling - * {@link Searcher#search(Query,int) Searcher#search()}without a sort criteria, + * {@link IndexSearcher#search(Query,int) IndexSearcher#search()}without a sort criteria, * only with slightly more overhead. */ public Sort() { diff --git a/lucene/src/java/org/apache/lucene/search/SortField.java b/lucene/src/java/org/apache/lucene/search/SortField.java index a5bfc761cef..0875ccfa287 100644 --- a/lucene/src/java/org/apache/lucene/search/SortField.java +++ b/lucene/src/java/org/apache/lucene/search/SortField.java @@ -42,7 +42,7 @@ import org.apache.lucene.util.BytesRef; public class SortField implements Serializable { - /** Sort by document score (relevancy). Sort values are Float and higher + /** Sort by document score (relevance). Sort values are Float and higher * values are at the front. */ public static final int SCORE = 0; @@ -93,7 +93,7 @@ implements Serializable { /** Sort use byte[] index values. */ public static final int BYTES = 12; - /** Represents sorting by document score (relevancy). */ + /** Represents sorting by document score (relevance). */ public static final SortField FIELD_SCORE = new SortField (null, SCORE); /** Represents sorting by document number (index order). */ diff --git a/lucene/src/java/org/apache/lucene/search/SpanFilter.java b/lucene/src/java/org/apache/lucene/search/SpanFilter.java index e2a5946c166..e46ff1e8242 100644 --- a/lucene/src/java/org/apache/lucene/search/SpanFilter.java +++ b/lucene/src/java/org/apache/lucene/search/SpanFilter.java @@ -15,7 +15,7 @@ package org.apache.lucene.search; * limitations under the License. */ -import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexReader.AtomicReaderContext; import java.io.IOException; @@ -30,9 +30,9 @@ import java.io.IOException; public abstract class SpanFilter extends Filter{ /** Returns a SpanFilterResult with true for documents which should be permitted in search results, and false for those that should not and Spans for where the true docs match. - * @param reader The {@link org.apache.lucene.index.IndexReader} to load position and DocIdSet information from + * @param context The {@link AtomicReaderContext} to load position and DocIdSet information from * @return A {@link SpanFilterResult} * @throws java.io.IOException if there was an issue accessing the necessary information * */ - public abstract SpanFilterResult bitSpans(IndexReader reader) throws IOException; + public abstract SpanFilterResult bitSpans(AtomicReaderContext context) throws IOException; } diff --git a/lucene/src/java/org/apache/lucene/search/SpanQueryFilter.java b/lucene/src/java/org/apache/lucene/search/SpanQueryFilter.java index 4c8265155ea..b0ccb481e59 100644 --- a/lucene/src/java/org/apache/lucene/search/SpanQueryFilter.java +++ b/lucene/src/java/org/apache/lucene/search/SpanQueryFilter.java @@ -16,7 +16,7 @@ package org.apache.lucene.search; */ -import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexReader.AtomicReaderContext; import org.apache.lucene.search.spans.SpanQuery; import org.apache.lucene.search.spans.Spans; import org.apache.lucene.util.OpenBitSet; @@ -52,16 +52,16 @@ public class SpanQueryFilter extends SpanFilter { } @Override - public DocIdSet getDocIdSet(IndexReader reader) throws IOException { - SpanFilterResult result = bitSpans(reader); + public DocIdSet getDocIdSet(AtomicReaderContext context) throws IOException { + SpanFilterResult result = bitSpans(context); return result.getDocIdSet(); } @Override - public SpanFilterResult bitSpans(IndexReader reader) throws IOException { + public SpanFilterResult bitSpans(AtomicReaderContext context) throws IOException { - final OpenBitSet bits = new OpenBitSet(reader.maxDoc()); - Spans spans = query.getSpans(reader); + final OpenBitSet bits = new OpenBitSet(context.reader.maxDoc()); + Spans spans = query.getSpans(context); List tmp = new ArrayList(20); int currentDoc = -1; SpanFilterResult.PositionInfo currentInfo = null; diff --git a/lucene/src/java/org/apache/lucene/search/TermCollectingRewrite.java b/lucene/src/java/org/apache/lucene/search/TermCollectingRewrite.java index e710b96c145..501831728d3 100644 --- a/lucene/src/java/org/apache/lucene/search/TermCollectingRewrite.java +++ b/lucene/src/java/org/apache/lucene/search/TermCollectingRewrite.java @@ -18,8 +18,6 @@ package org.apache.lucene.search; */ import java.io.IOException; -import java.util.ArrayList; -import java.util.List; import java.util.Comparator; import org.apache.lucene.index.Fields; @@ -27,25 +25,33 @@ import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Term; import org.apache.lucene.index.Terms; import org.apache.lucene.index.TermsEnum; +import org.apache.lucene.index.IndexReader.AtomicReaderContext; +import org.apache.lucene.index.IndexReader.ReaderContext; import org.apache.lucene.util.AttributeSource; import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.PerReaderTermState; import org.apache.lucene.util.ReaderUtil; abstract class TermCollectingRewrite extends MultiTermQuery.RewriteMethod { + /** Return a suitable top-level Query for holding all expanded terms. */ protected abstract Q getTopLevelQuery() throws IOException; /** Add a MultiTermQuery term to the top-level query */ - protected abstract void addClause(Q topLevel, Term term, int docCount, float boost) throws IOException; + protected final void addClause(Q topLevel, Term term, int docCount, float boost) throws IOException { + addClause(topLevel, term, docCount, boost, null); + } + + protected abstract void addClause(Q topLevel, Term term, int docCount, float boost, PerReaderTermState states) throws IOException; + protected final void collectTerms(IndexReader reader, MultiTermQuery query, TermCollector collector) throws IOException { - final List subReaders = new ArrayList(); - ReaderUtil.gatherSubReaders(subReaders, reader); + ReaderContext topReaderContext = reader.getTopReaderContext(); Comparator lastTermComp = null; - - for (IndexReader r : subReaders) { - final Fields fields = r.fields(); + final AtomicReaderContext[] leaves = ReaderUtil.leaves(topReaderContext); + for (AtomicReaderContext context : leaves) { + final Fields fields = context.reader.fields(); if (fields == null) { // reader has no fields continue; @@ -68,11 +74,10 @@ abstract class TermCollectingRewrite extends MultiTermQuery.Rew if (lastTermComp != null && newTermComp != null && newTermComp != lastTermComp) throw new RuntimeException("term comparator should not change between segments: "+lastTermComp+" != "+newTermComp); lastTermComp = newTermComp; - + collector.setReaderContext(topReaderContext, context); collector.setNextEnum(termsEnum); BytesRef bytes; while ((bytes = termsEnum.next()) != null) { - termsEnum.cacheCurrentTerm(); if (!collector.collect(bytes)) return; // interrupt whole term collection, so also don't iterate other subReaders } @@ -80,6 +85,14 @@ abstract class TermCollectingRewrite extends MultiTermQuery.Rew } protected static abstract class TermCollector { + + protected AtomicReaderContext readerContext; + protected ReaderContext topReaderContext; + + public void setReaderContext(ReaderContext topReaderContext, AtomicReaderContext readerContext) { + this.readerContext = readerContext; + this.topReaderContext = topReaderContext; + } /** attributes used for communication with the enum */ public final AttributeSource attributes = new AttributeSource(); diff --git a/lucene/src/java/org/apache/lucene/search/TermQuery.java b/lucene/src/java/org/apache/lucene/search/TermQuery.java index 6eb34c6eab9..cb729ababc5 100644 --- a/lucene/src/java/org/apache/lucene/search/TermQuery.java +++ b/lucene/src/java/org/apache/lucene/search/TermQuery.java @@ -21,9 +21,16 @@ import java.io.IOException; import java.util.Set; import org.apache.lucene.index.DocsEnum; -import org.apache.lucene.index.Term; import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.TermState; +import org.apache.lucene.index.Terms; +import org.apache.lucene.index.IndexReader.AtomicReaderContext; +import org.apache.lucene.index.IndexReader.ReaderContext; +import org.apache.lucene.index.Term; import org.apache.lucene.search.Explanation.IDFExplanation; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.PerReaderTermState; +import org.apache.lucene.util.ReaderUtil; import org.apache.lucene.util.ToStringUtils; /** A Query that matches documents containing a term. @@ -31,19 +38,23 @@ import org.apache.lucene.util.ToStringUtils; */ public class TermQuery extends Query { private final Term term; - private final int docFreq; + private int docFreq; + private transient PerReaderTermState perReaderTermState; private class TermWeight extends Weight { private final Similarity similarity; private float value; - private float idf; + private final float idf; private float queryNorm; private float queryWeight; - private IDFExplanation idfExp; + private final IDFExplanation idfExp; + private transient PerReaderTermState termStates; - public TermWeight(IndexSearcher searcher) + public TermWeight(IndexSearcher searcher, PerReaderTermState termStates, int docFreq) throws IOException { - this.similarity = getSimilarity(searcher); + assert termStates != null : "PerReaderTermState must not be null"; + this.termStates = termStates; + this.similarity = searcher.getSimilarityProvider().get(term.field()); if (docFreq != -1) { idfExp = similarity.idfExplain(term, searcher, docFreq); } else { @@ -75,21 +86,31 @@ public class TermQuery extends Query { } @Override - public Scorer scorer(IndexReader reader, boolean scoreDocsInOrder, boolean topScorer) throws IOException { - DocsEnum docs = reader.termDocsEnum(reader.getDeletedDocs(), - term.field(), - term.bytes()); - - if (docs == null) { + public Scorer scorer(AtomicReaderContext context, ScorerContext scorerContext) throws IOException { + final String field = term.field(); + final IndexReader reader = context.reader; + assert termStates.topReaderContext == ReaderUtil.getTopLevelContext(context) : "The top-reader used to create Weight is not the same as the current reader's top-reader"; + final TermState state = termStates + .get(context.ord); + if (state == null) { // term is not present in that reader + assert termNotInReader(reader, field, term.bytes()) : "no termstate found but term exists in reader"; return null; } - - return new TermScorer(this, docs, similarity, reader.norms(term.field())); + final DocsEnum docs = reader.termDocsEnum(reader.getDeletedDocs(), field, term.bytes(), state); + assert docs != null; + return new TermScorer(this, docs, similarity, context.reader.norms(field)); } - + + private boolean termNotInReader(IndexReader reader, String field, BytesRef bytes) throws IOException { + // only called from assert + final Terms terms = reader.terms(field); + return terms == null || terms.docFreq(bytes) == 0; + } + @Override - public Explanation explain(IndexReader reader, int doc) + public Explanation explain(AtomicReaderContext context, int doc) throws IOException { + final IndexReader reader = context.reader; ComplexExplanation result = new ComplexExplanation(); result.setDescription("weight("+getQuery()+" in "+doc+"), product of:"); @@ -138,7 +159,7 @@ public class TermQuery extends Query { fieldExpl.addDetail(expl); Explanation fieldNormExpl = new Explanation(); - byte[] fieldNorms = reader.norms(field); + final byte[] fieldNorms = reader.norms(field); float fieldNorm = fieldNorms!=null ? similarity.decodeNormValue(fieldNorms[doc]) : 1.0f; fieldNormExpl.setValue(fieldNorm); @@ -174,6 +195,17 @@ public class TermQuery extends Query { public TermQuery(Term t, int docFreq) { term = t; this.docFreq = docFreq; + perReaderTermState = null; + } + + /** Expert: constructs a TermQuery that will use the + * provided docFreq instead of looking up the docFreq + * against the searcher. */ + public TermQuery(Term t, PerReaderTermState states) { + assert states != null; + term = t; + docFreq = states.docFreq(); + perReaderTermState = states; } /** Returns the term of this query. */ @@ -181,7 +213,21 @@ public class TermQuery extends Query { @Override public Weight createWeight(IndexSearcher searcher) throws IOException { - return new TermWeight(searcher); + final ReaderContext context = searcher.getTopReaderContext(); + final int weightDocFreq; + final PerReaderTermState termState; + if (perReaderTermState == null || perReaderTermState.topReaderContext != context) { + // make TermQuery single-pass if we don't have a PRTS or if the context differs! + termState = PerReaderTermState.build(context, term, true); // cache term lookups! + // we must not ignore the given docFreq - if set use the given value + weightDocFreq = docFreq == -1 ? termState.docFreq() : docFreq; + } else { + // PRTS was pre-build for this IS + termState = this.perReaderTermState; + weightDocFreq = docFreq; + } + + return new TermWeight(searcher, termState, weightDocFreq); } @Override diff --git a/lucene/src/java/org/apache/lucene/search/TermScorer.java b/lucene/src/java/org/apache/lucene/search/TermScorer.java index 48ddd3e05cd..9a9ef5eeb3c 100644 --- a/lucene/src/java/org/apache/lucene/search/TermScorer.java +++ b/lucene/src/java/org/apache/lucene/search/TermScorer.java @@ -38,7 +38,8 @@ final class TermScorer extends Scorer { private int[] docs; private int[] freqs; private final DocsEnum.BulkReadResult bulkResult; - + private final Similarity similarity; + /** * Construct a TermScorer. * @@ -53,15 +54,15 @@ final class TermScorer extends Scorer { * The field norms of the document fields for the Term. */ TermScorer(Weight weight, DocsEnum td, Similarity similarity, byte[] norms) { - super(similarity, weight); - + super(weight); + this.similarity = similarity; this.docsEnum = td; this.norms = norms; this.weightValue = weight.getValue(); bulkResult = td.getBulkResult(); for (int i = 0; i < SCORE_CACHE_SIZE; i++) - scoreCache[i] = getSimilarity().tf(i) * weightValue; + scoreCache[i] = similarity.tf(i) * weightValue; } @Override @@ -77,7 +78,7 @@ final class TermScorer extends Scorer { // firstDocID is ignored since nextDoc() sets 'doc' @Override - protected boolean score(Collector c, int end, int firstDocID) throws IOException { + public boolean score(Collector c, int end, int firstDocID) throws IOException { c.setScorer(this); while (doc < end) { // for docs in window c.collect(doc); // collect score @@ -136,9 +137,9 @@ final class TermScorer extends Scorer { float raw = // compute tf(f)*weight freq < SCORE_CACHE_SIZE // check cache ? scoreCache[freq] // cache hit - : getSimilarity().tf(freq)*weightValue; // cache miss + : similarity.tf(freq)*weightValue; // cache miss - return norms == null ? raw : raw * getSimilarity().decodeNormValue(norms[doc]); // normalize for field + return norms == null ? raw : raw * similarity.decodeNormValue(norms[doc]); // normalize for field } /** diff --git a/lucene/src/java/org/apache/lucene/search/TimeLimitingCollector.java b/lucene/src/java/org/apache/lucene/search/TimeLimitingCollector.java index 405f1a09901..63ad23d9d6c 100644 --- a/lucene/src/java/org/apache/lucene/search/TimeLimitingCollector.java +++ b/lucene/src/java/org/apache/lucene/search/TimeLimitingCollector.java @@ -19,7 +19,7 @@ package org.apache.lucene.search; import java.io.IOException; -import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexReader.AtomicReaderContext; import org.apache.lucene.util.ThreadInterruptedException; /** @@ -213,9 +213,9 @@ public class TimeLimitingCollector extends Collector { } @Override - public void setNextReader(IndexReader reader, int base) throws IOException { - collector.setNextReader(reader, base); - this.docBase = base; + public void setNextReader(AtomicReaderContext context) throws IOException { + collector.setNextReader(context); + this.docBase = context.docBase; } @Override diff --git a/lucene/src/java/org/apache/lucene/search/TopDocs.java b/lucene/src/java/org/apache/lucene/search/TopDocs.java index c185fbe1a82..6d14f88bb20 100644 --- a/lucene/src/java/org/apache/lucene/search/TopDocs.java +++ b/lucene/src/java/org/apache/lucene/search/TopDocs.java @@ -18,8 +18,8 @@ package org.apache.lucene.search; */ /** Represents hits returned by {@link - * Searcher#search(Query,Filter,int)} and {@link - * Searcher#search(Query,int)}. */ + * IndexSearcher#search(Query,Filter,int)} and {@link + * IndexSearcher#search(Query,int)}. */ public class TopDocs implements java.io.Serializable { /** The total number of hits for the query. */ diff --git a/lucene/src/java/org/apache/lucene/search/TopFieldCollector.java b/lucene/src/java/org/apache/lucene/search/TopFieldCollector.java index 7c78274bb84..bc10124e90c 100644 --- a/lucene/src/java/org/apache/lucene/search/TopFieldCollector.java +++ b/lucene/src/java/org/apache/lucene/search/TopFieldCollector.java @@ -19,7 +19,7 @@ package org.apache.lucene.search; import java.io.IOException; -import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexReader.AtomicReaderContext; import org.apache.lucene.search.FieldValueHitQueue.Entry; import org.apache.lucene.util.PriorityQueue; @@ -92,9 +92,9 @@ public abstract class TopFieldCollector extends TopDocsCollector { } @Override - public void setNextReader(IndexReader reader, int docBase) throws IOException { - this.docBase = docBase; - queue.setComparator(0, comparator.setNextReader(reader, docBase)); + public void setNextReader(AtomicReaderContext context) throws IOException { + this.docBase = context.docBase; + queue.setComparator(0, comparator.setNextReader(context)); comparator = queue.firstComparator; } @@ -447,10 +447,10 @@ public abstract class TopFieldCollector extends TopDocsCollector { } @Override - public void setNextReader(IndexReader reader, int docBase) throws IOException { - this.docBase = docBase; + public void setNextReader(AtomicReaderContext context) throws IOException { + docBase = context.docBase; for (int i = 0; i < comparators.length; i++) { - queue.setComparator(i, comparators[i].setNextReader(reader, docBase)); + queue.setComparator(i, comparators[i].setNextReader(context)); } } diff --git a/lucene/src/java/org/apache/lucene/search/TopFieldDocs.java b/lucene/src/java/org/apache/lucene/search/TopFieldDocs.java index ec9750f64cf..fc4c2331c3b 100644 --- a/lucene/src/java/org/apache/lucene/search/TopFieldDocs.java +++ b/lucene/src/java/org/apache/lucene/search/TopFieldDocs.java @@ -19,7 +19,7 @@ package org.apache.lucene.search; /** Represents hits returned by {@link - * Searcher#search(Query,Filter,int,Sort)}. + * IndexSearcher#search(Query,Filter,int,Sort)}. */ public class TopFieldDocs extends TopDocs { diff --git a/lucene/src/java/org/apache/lucene/search/TopScoreDocCollector.java b/lucene/src/java/org/apache/lucene/search/TopScoreDocCollector.java index 08a6897065a..d8f317592c1 100644 --- a/lucene/src/java/org/apache/lucene/search/TopScoreDocCollector.java +++ b/lucene/src/java/org/apache/lucene/search/TopScoreDocCollector.java @@ -19,7 +19,7 @@ package org.apache.lucene.search; import java.io.IOException; -import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexReader.AtomicReaderContext; /** * A {@link Collector} implementation that collects the top-scoring hits, @@ -155,8 +155,8 @@ public abstract class TopScoreDocCollector extends TopDocsCollector { } @Override - public void setNextReader(IndexReader reader, int base) { - docBase = base; + public void setNextReader(AtomicReaderContext context) { + docBase = context.docBase; } @Override diff --git a/lucene/src/java/org/apache/lucene/search/TopTermsRewrite.java b/lucene/src/java/org/apache/lucene/search/TopTermsRewrite.java index b3e409a8df7..472e99de705 100644 --- a/lucene/src/java/org/apache/lucene/search/TopTermsRewrite.java +++ b/lucene/src/java/org/apache/lucene/search/TopTermsRewrite.java @@ -25,9 +25,11 @@ import java.util.Comparator; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Term; +import org.apache.lucene.index.TermState; import org.apache.lucene.index.TermsEnum; import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.PerReaderTermState; /** * Base rewrite method for collecting only the top terms @@ -78,14 +80,14 @@ public abstract class TopTermsRewrite extends TermCollectingRew this.termComp = termsEnum.getComparator(); // lazy init the initial ScoreTerm because comparator is not known on ctor: if (st == null) - st = new ScoreTerm(this.termComp); + st = new ScoreTerm(this.termComp, new PerReaderTermState(topReaderContext)); boostAtt = termsEnum.attributes().addAttribute(BoostAttribute.class); } @Override - public boolean collect(BytesRef bytes) { + public boolean collect(BytesRef bytes) throws IOException { final float boost = boostAtt.getBoost(); - // ignore uncompetetive hits + // ignore uncompetitive hits if (stQueue.size() == maxSize) { final ScoreTerm t = stQueue.peek(); if (boost < t.boost) @@ -94,23 +96,27 @@ public abstract class TopTermsRewrite extends TermCollectingRew return true; } ScoreTerm t = visitedTerms.get(bytes); + final TermState state = termsEnum.termState(); + assert state != null; if (t != null) { // if the term is already in the PQ, only update docFreq of term in PQ - t.docFreq += termsEnum.docFreq(); assert t.boost == boost : "boost should be equal in all segment TermsEnums"; + t.termState.register(state, readerContext.ord, termsEnum.docFreq()); } else { // add new entry in PQ, we must clone the term, else it may get overwritten! st.bytes.copy(bytes); st.boost = boost; - st.docFreq = termsEnum.docFreq(); visitedTerms.put(st.bytes, st); + assert st.termState.docFreq() == 0; + st.termState.register(state, readerContext.ord, termsEnum.docFreq()); stQueue.offer(st); // possibly drop entries from queue if (stQueue.size() > maxSize) { st = stQueue.poll(); visitedTerms.remove(st.bytes); + st.termState.clear(); // reset the termstate! } else { - st = new ScoreTerm(termComp); + st = new ScoreTerm(termComp, new PerReaderTermState(topReaderContext)); } assert stQueue.size() <= maxSize : "the PQ size must be limited to maxSize"; // set maxBoostAtt with values to help FuzzyTermsEnum to optimize @@ -120,6 +126,7 @@ public abstract class TopTermsRewrite extends TermCollectingRew maxBoostAtt.setCompetitiveTerm(t.bytes); } } + return true; } }); @@ -130,8 +137,8 @@ public abstract class TopTermsRewrite extends TermCollectingRew ArrayUtil.quickSort(scoreTerms, scoreTermSortByTermComp); for (final ScoreTerm st : scoreTerms) { final Term term = placeholderTerm.createTerm(st.bytes); - assert reader.docFreq(term) == st.docFreq; - addClause(q, term, st.docFreq, query.getBoost() * st.boost); // add to query + assert reader.docFreq(term) == st.termState.docFreq() : "reader DF is " + reader.docFreq(term) + " vs " + st.termState.docFreq(); + addClause(q, term, st.termState.docFreq(), query.getBoost() * st.boost, st.termState); // add to query } query.incTotalNumberOfTerms(scoreTerms.length); return q; @@ -147,7 +154,7 @@ public abstract class TopTermsRewrite extends TermCollectingRew if (this == obj) return true; if (obj == null) return false; if (getClass() != obj.getClass()) return false; - final TopTermsRewrite other = (TopTermsRewrite) obj; + final TopTermsRewrite other = (TopTermsRewrite) obj; if (size != other.size) return false; return true; } @@ -163,13 +170,12 @@ public abstract class TopTermsRewrite extends TermCollectingRew static final class ScoreTerm implements Comparable { public final Comparator termComp; - public final BytesRef bytes = new BytesRef(); public float boost; - public int docFreq; - - public ScoreTerm(Comparator termComp) { + public final PerReaderTermState termState; + public ScoreTerm(Comparator termComp, PerReaderTermState termState) { this.termComp = termComp; + this.termState = termState; } public int compareTo(ScoreTerm other) { diff --git a/lucene/src/java/org/apache/lucene/search/TotalHitCountCollector.java b/lucene/src/java/org/apache/lucene/search/TotalHitCountCollector.java index 444fa67f942..b154091e27d 100644 --- a/lucene/src/java/org/apache/lucene/search/TotalHitCountCollector.java +++ b/lucene/src/java/org/apache/lucene/search/TotalHitCountCollector.java @@ -17,7 +17,7 @@ package org.apache.lucene.search; * limitations under the License. */ -import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexReader.AtomicReaderContext; /** * Just counts the total number of hits. @@ -31,16 +31,20 @@ public class TotalHitCountCollector extends Collector { return totalHits; } + @Override public void setScorer(Scorer scorer) { } + @Override public void collect(int doc) { totalHits++; } - public void setNextReader(IndexReader reader, int docBase) { + @Override + public void setNextReader(AtomicReaderContext context) { } + @Override public boolean acceptsDocsOutOfOrder() { return true; } diff --git a/lucene/src/java/org/apache/lucene/search/Weight.java b/lucene/src/java/org/apache/lucene/search/Weight.java index 77a56bf8997..7ea739b7e1e 100644 --- a/lucene/src/java/org/apache/lucene/search/Weight.java +++ b/lucene/src/java/org/apache/lucene/search/Weight.java @@ -21,27 +21,35 @@ import java.io.IOException; import java.io.Serializable; import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexReader.AtomicReaderContext; +import org.apache.lucene.index.IndexReader.ReaderContext; /** * Expert: Calculate query weights and build query scorers. *

    - * The purpose of {@link Weight} is to ensure searching does not - * modify a {@link Query}, so that a {@link Query} instance can be reused.
    - * {@link Searcher} dependent state of the query should reside in the + * The purpose of {@link Weight} is to ensure searching does not modify a + * {@link Query}, so that a {@link Query} instance can be reused.
    + * {@link IndexSearcher} dependent state of the query should reside in the * {@link Weight}.
    * {@link IndexReader} dependent state should reside in the {@link Scorer}. *

    + * Since {@link Weight} creates {@link Scorer} instances for a given + * {@link AtomicReaderContext} ({@link #scorer(IndexReader.AtomicReaderContext, ScorerContext)}) + * callers must maintain the relationship between the searcher's top-level + * {@link ReaderContext} and the context used to create a {@link Scorer}. + *

    * A Weight is used in the following way: *

      *
    1. A Weight is constructed by a top-level query, given a - * Searcher ({@link Query#createWeight(Searcher)}). + * IndexSearcher ({@link Query#createWeight(IndexSearcher)}). *
    2. The {@link #sumOfSquaredWeights()} method is called on the * Weight to compute the query normalization factor - * {@link Similarity#queryNorm(float)} of the query clauses contained in the + * {@link SimilarityProvider#queryNorm(float)} of the query clauses contained in the * query. *
    3. The query normalization factor is passed to {@link #normalize(float)}. At * this point the weighting is complete. - *
    4. A Scorer is constructed by {@link #scorer(IndexReader,boolean,boolean)}. + *
    5. A Scorer is constructed by + * {@link #scorer(IndexReader.AtomicReaderContext, ScorerContext)}. *
    * * @since 2.9 @@ -51,12 +59,12 @@ public abstract class Weight implements Serializable { /** * An explanation of the score computation for the named document. * - * @param reader sub-reader containing the give doc - * @param doc + * @param context the readers context to create the {@link Explanation} for. + * @param doc the document's id relative to the given context's reader * @return an Explanation for the score - * @throws IOException + * @throws IOException if an {@link IOException} occurs */ - public abstract Explanation explain(IndexReader reader, int doc) throws IOException; + public abstract Explanation explain(AtomicReaderContext context, int doc) throws IOException; /** The query that this concerns. */ public abstract Query getQuery(); @@ -79,25 +87,13 @@ public abstract class Weight implements Serializable { * NOTE: null can be returned if no documents will be scored by this * query. * - * @param reader - * the {@link IndexReader} for which to return the {@link Scorer}. - * @param scoreDocsInOrder - * specifies whether in-order scoring of documents is required. Note - * that if set to false (i.e., out-of-order scoring is required), - * this method can return whatever scoring mode it supports, as every - * in-order scorer is also an out-of-order one. However, an - * out-of-order scorer may not support {@link Scorer#nextDoc()} - * and/or {@link Scorer#advance(int)}, therefore it is recommended to - * request an in-order scorer if use of these methods is required. - * @param topScorer - * if true, {@link Scorer#score(Collector)} will be called; if false, - * {@link Scorer#nextDoc()} and/or {@link Scorer#advance(int)} will - * be called. + * @param context + * the {@link AtomicReaderContext} for which to return the {@link Scorer}. + * @param scorerContext the {@link ScorerContext} holding the scores context variables * @return a {@link Scorer} which scores documents in/out-of order. * @throws IOException */ - public abstract Scorer scorer(IndexReader reader, boolean scoreDocsInOrder, - boolean topScorer) throws IOException; + public abstract Scorer scorer(AtomicReaderContext context, ScorerContext scorerContext) throws IOException; /** The sum of squared weights of contained query clauses. */ public abstract float sumOfSquaredWeights() throws IOException; @@ -106,7 +102,7 @@ public abstract class Weight implements Serializable { * Returns true iff this implementation scores docs only out of order. This * method is used in conjunction with {@link Collector}'s * {@link Collector#acceptsDocsOutOfOrder() acceptsDocsOutOfOrder} and - * {@link #scorer(org.apache.lucene.index.IndexReader, boolean, boolean)} to + * {@link #scorer(IndexReader.AtomicReaderContext, ScorerContext)} to * create a matching {@link Scorer} instance for a given {@link Collector}, or * vice versa. *

    @@ -115,4 +111,82 @@ public abstract class Weight implements Serializable { */ public boolean scoresDocsOutOfOrder() { return false; } + /** + * A struct like class encapsulating a scorer's context variables. + * ScorerContex is a strictly immutable struct that follows a + * create on modification pattern. If a context variable changes + * through one of the modifiers like {@link #topScorer(boolean)} a new + * {@link ScorerContext} instance is creates. If the modifier call doesn't + * change the instance the method call has no effect and the same instance is + * returned from the modifier. + * + * @lucene.experimental + */ + public static final class ScorerContext { + + /** + * Specifies whether in-order scoring of documents is required. Note that if + * set to false (i.e., out-of-order scoring is required), this method can + * return whatever scoring mode it supports, as every in-order scorer is + * also an out-of-order one. However, an out-of-order scorer may not support + * {@link Scorer#nextDoc()} and/or {@link Scorer#advance(int)}, therefore it + * is recommended to request an in-order scorer if use of these methods is + * required. + */ + public final boolean scoreDocsInOrder; + + /** + * if true, {@link Scorer#score(Collector)} will be called; if + * false, {@link Scorer#nextDoc()} and/or {@link Scorer#advance(int)} will + * be called instead. + */ + public final boolean topScorer; + + + private static final ScorerContext DEFAULT_CONTEXT = new ScorerContext(true, false); + + /** + * Returns a default {@link ScorerContext} template initialized with: + *

      + *
    • {@link #scoreDocsInOrder} = true
    • + *
    • {@link #topScorer} = false
    • + *
    + */ + public static ScorerContext def() { + return DEFAULT_CONTEXT; + } + + private ScorerContext(boolean scoreDocsInOrder, boolean topScorer) { + this.scoreDocsInOrder = scoreDocsInOrder; + this.topScorer = topScorer; + } + + /** + * Creates and returns a copy of this context with the given value for + * {@link #scoreDocsInOrder} and returns a new instance of + * {@link ScorerContext} iff the given value differs from the + * {@link #scoreDocsInOrder}. Otherwise, this method has no effect and + * returns this instance. + */ + public ScorerContext scoreDocsInOrder(boolean scoreDocsInOrder) { + if (this.scoreDocsInOrder == scoreDocsInOrder) { + return this; + } + return new ScorerContext(scoreDocsInOrder, topScorer); + } + + /** + * Creates and returns a copy of this context with the given value for + * {@link #topScorer} and returns a new instance of + * {@link ScorerContext} iff the given value differs from the + * {@link #topScorer}. Otherwise, this method has no effect and + * returns this instance. + */ + public ScorerContext topScorer(boolean topScorer) { + if (this.topScorer == topScorer) { + return this; + } + return new ScorerContext(scoreDocsInOrder, topScorer); + } + } } diff --git a/lucene/src/java/org/apache/lucene/search/cache/DocTermsIndexCreator.java b/lucene/src/java/org/apache/lucene/search/cache/DocTermsIndexCreator.java index da03f5b92af..6ec93729857 100644 --- a/lucene/src/java/org/apache/lucene/search/cache/DocTermsIndexCreator.java +++ b/lucene/src/java/org/apache/lucene/search/cache/DocTermsIndexCreator.java @@ -24,6 +24,8 @@ import org.apache.lucene.index.DocsAndPositionsEnum; import org.apache.lucene.index.DocsEnum; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.MultiFields; +import org.apache.lucene.index.OrdTermState; +import org.apache.lucene.index.TermState; import org.apache.lucene.index.Terms; import org.apache.lucene.index.TermsEnum; import org.apache.lucene.search.DocIdSetIterator; @@ -303,11 +305,6 @@ public class DocTermsIndexCreator extends EntryCreatorWithOptions return term; } - @Override - public void cacheCurrentTerm() throws IOException { - throw new UnsupportedOperationException(); - } - @Override public BytesRef term() throws IOException { return term; @@ -323,6 +320,11 @@ public class DocTermsIndexCreator extends EntryCreatorWithOptions throw new UnsupportedOperationException(); } + @Override + public long totalTermFreq() { + return -1; + } + @Override public DocsEnum docs(Bits skipDocs, DocsEnum reuse) throws IOException { throw new UnsupportedOperationException(); @@ -337,6 +339,19 @@ public class DocTermsIndexCreator extends EntryCreatorWithOptions public Comparator getComparator() throws IOException { return BytesRef.getUTF8SortedAsUnicodeComparator(); } + + @Override + public void seek(BytesRef term, TermState state) throws IOException { + assert state != null && state instanceof OrdTermState; + this.seek(((OrdTermState)state).ord); + } + + @Override + public TermState termState() throws IOException { + OrdTermState state = new OrdTermState(); + state.ord = currentOrd; + return state; + } } } } diff --git a/lucene/src/java/org/apache/lucene/search/cache/EntryCreator.java b/lucene/src/java/org/apache/lucene/search/cache/EntryCreator.java index 0e0daff40cd..362cc83a71e 100644 --- a/lucene/src/java/org/apache/lucene/search/cache/EntryCreator.java +++ b/lucene/src/java/org/apache/lucene/search/cache/EntryCreator.java @@ -58,6 +58,7 @@ public abstract class EntryCreator implements Serializable // This can be removed //------------------------------------------------------------------------ + @Override public boolean equals(Object obj) { if( obj instanceof EntryCreator ) { return getCacheKey().equals( ((EntryCreator)obj).getCacheKey() ); diff --git a/lucene/src/java/org/apache/lucene/search/cache/package.html b/lucene/src/java/org/apache/lucene/search/cache/package.html new file mode 100644 index 00000000000..1ca0c5ddc44 --- /dev/null +++ b/lucene/src/java/org/apache/lucene/search/cache/package.html @@ -0,0 +1,25 @@ + + + + + + + +Fieldcache + + diff --git a/lucene/src/java/org/apache/lucene/search/function/CustomScoreQuery.java b/lucene/src/java/org/apache/lucene/search/function/CustomScoreQuery.java index e1e39f3fd21..8a5ba9abf41 100755 --- a/lucene/src/java/org/apache/lucene/search/function/CustomScoreQuery.java +++ b/lucene/src/java/org/apache/lucene/search/function/CustomScoreQuery.java @@ -22,6 +22,7 @@ import java.util.Set; import java.util.Arrays; import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexReader.AtomicReaderContext; import org.apache.lucene.index.Term; import org.apache.lucene.search.ComplexExplanation; import org.apache.lucene.search.Explanation; @@ -29,7 +30,6 @@ import org.apache.lucene.search.Query; import org.apache.lucene.search.Weight; import org.apache.lucene.search.Scorer; import org.apache.lucene.search.IndexSearcher; -import org.apache.lucene.search.Similarity; import org.apache.lucene.util.ToStringUtils; /** @@ -52,7 +52,7 @@ public class CustomScoreQuery extends Query { /** * Create a CustomScoreQuery over input subQuery. - * @param subQuery the sub query whose scored is being customed. Must not be null. + * @param subQuery the sub query whose scored is being customized. Must not be null. */ public CustomScoreQuery(Query subQuery) { this(subQuery, new ValueSourceQuery[0]); @@ -182,13 +182,11 @@ public class CustomScoreQuery extends Query { //=========================== W E I G H T ============================ private class CustomWeight extends Weight { - Similarity similarity; Weight subQueryWeight; Weight[] valSrcWeights; boolean qStrict; public CustomWeight(IndexSearcher searcher) throws IOException { - this.similarity = getSimilarity(searcher); this.subQueryWeight = subQuery.weight(searcher); this.valSrcWeights = new Weight[valSrcQueries.length]; for(int i = 0; i < valSrcQueries.length; i++) { @@ -239,40 +237,40 @@ public class CustomScoreQuery extends Query { } @Override - public Scorer scorer(IndexReader reader, boolean scoreDocsInOrder, boolean topScorer) throws IOException { + public Scorer scorer(AtomicReaderContext context, ScorerContext scorerContext) throws IOException { // Pass true for "scoresDocsInOrder", because we // require in-order scoring, even if caller does not, // since we call advance on the valSrcScorers. Pass // false for "topScorer" because we will not invoke // score(Collector) on these scorers: - Scorer subQueryScorer = subQueryWeight.scorer(reader, true, false); + Scorer subQueryScorer = subQueryWeight.scorer(context, ScorerContext.def()); if (subQueryScorer == null) { return null; } Scorer[] valSrcScorers = new Scorer[valSrcWeights.length]; for(int i = 0; i < valSrcScorers.length; i++) { - valSrcScorers[i] = valSrcWeights[i].scorer(reader, true, topScorer); + valSrcScorers[i] = valSrcWeights[i].scorer(context, scorerContext.scoreDocsInOrder(true)); } - return new CustomScorer(similarity, reader, this, subQueryScorer, valSrcScorers); + return new CustomScorer(context.reader, this, subQueryScorer, valSrcScorers); } @Override - public Explanation explain(IndexReader reader, int doc) throws IOException { - Explanation explain = doExplain(reader, doc); + public Explanation explain(AtomicReaderContext context, int doc) throws IOException { + Explanation explain = doExplain(context, doc); return explain == null ? new Explanation(0.0f, "no matching docs") : explain; } - private Explanation doExplain(IndexReader reader, int doc) throws IOException { - Explanation subQueryExpl = subQueryWeight.explain(reader, doc); + private Explanation doExplain(AtomicReaderContext info, int doc) throws IOException { + Explanation subQueryExpl = subQueryWeight.explain(info, doc); if (!subQueryExpl.isMatch()) { return subQueryExpl; } // match Explanation[] valSrcExpls = new Explanation[valSrcWeights.length]; for(int i = 0; i < valSrcWeights.length; i++) { - valSrcExpls[i] = valSrcWeights[i].explain(reader, doc); + valSrcExpls[i] = valSrcWeights[i].explain(info, doc); } - Explanation customExp = CustomScoreQuery.this.getCustomScoreProvider(reader).customExplain(doc,subQueryExpl,valSrcExpls); + Explanation customExp = CustomScoreQuery.this.getCustomScoreProvider(info.reader).customExplain(doc,subQueryExpl,valSrcExpls); float sc = getValue() * customExp.getValue(); Explanation res = new ComplexExplanation( true, sc, CustomScoreQuery.this.toString() + ", product of:"); @@ -302,9 +300,9 @@ public class CustomScoreQuery extends Query { private float vScores[]; // reused in score() to avoid allocating this array for each doc // constructor - private CustomScorer(Similarity similarity, IndexReader reader, CustomWeight w, + private CustomScorer(IndexReader reader, CustomWeight w, Scorer subQueryScorer, Scorer[] valSrcScorers) throws IOException { - super(similarity,w); + super(w); this.qWeight = w.getValue(); this.subQueryScorer = subQueryScorer; this.valSrcScorers = valSrcScorers; diff --git a/lucene/src/java/org/apache/lucene/search/function/FieldCacheSource.java b/lucene/src/java/org/apache/lucene/search/function/FieldCacheSource.java index b55ae5ff820..c079ebddb81 100644 --- a/lucene/src/java/org/apache/lucene/search/function/FieldCacheSource.java +++ b/lucene/src/java/org/apache/lucene/search/function/FieldCacheSource.java @@ -20,6 +20,7 @@ package org.apache.lucene.search.function; import java.io.IOException; import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexReader.AtomicReaderContext; import org.apache.lucene.search.FieldCache; /** @@ -55,8 +56,8 @@ public abstract class FieldCacheSource extends ValueSource { /* (non-Javadoc) @see org.apache.lucene.search.function.ValueSource#getValues(org.apache.lucene.index.IndexReader) */ @Override - public final DocValues getValues(IndexReader reader) throws IOException { - return getCachedFieldValues(FieldCache.DEFAULT, field, reader); + public final DocValues getValues(AtomicReaderContext context) throws IOException { + return getCachedFieldValues(FieldCache.DEFAULT, field, context.reader); } /* (non-Javadoc) @see org.apache.lucene.search.function.ValueSource#description() */ diff --git a/lucene/src/java/org/apache/lucene/search/function/MultiValueSource.java b/lucene/src/java/org/apache/lucene/search/function/MultiValueSource.java index 534cd1230b9..b3ec7681ad1 100644 --- a/lucene/src/java/org/apache/lucene/search/function/MultiValueSource.java +++ b/lucene/src/java/org/apache/lucene/search/function/MultiValueSource.java @@ -20,6 +20,9 @@ package org.apache.lucene.search.function; import java.io.IOException; import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexReader.AtomicReaderContext; +import org.apache.lucene.index.IndexReader.AtomicReaderContext; +import org.apache.lucene.index.IndexReader.ReaderContext; import org.apache.lucene.search.Explanation; import org.apache.lucene.util.ReaderUtil; @@ -44,16 +47,17 @@ public final class MultiValueSource extends ValueSource { } @Override - public DocValues getValues(IndexReader reader) throws IOException { - - IndexReader[] subReaders = reader.getSequentialSubReaders(); - if (subReaders != null) { - // This is a composite reader - return new MultiDocValues(subReaders); - } else { + public DocValues getValues(AtomicReaderContext context) throws IOException { // Already an atomic reader -- just delegate - return other.getValues(reader); + return other.getValues(context); + } + + @Override + public DocValues getValues(ReaderContext context) throws IOException { + if (context.isAtomic) { + return getValues((AtomicReaderContext) context); } + return new MultiDocValues(ReaderUtil.leaves(context)); } @Override @@ -78,59 +82,56 @@ public final class MultiValueSource extends ValueSource { private final class MultiDocValues extends DocValues { final DocValues[] docValues; - final int[] docStarts; + final AtomicReaderContext[] leaves; - MultiDocValues(IndexReader[] subReaders) throws IOException { - docValues = new DocValues[subReaders.length]; - docStarts = new int[subReaders.length]; - int base = 0; - for(int i=0;i

    diff --git a/lucene/src/java/org/apache/lucene/search/payloads/PayloadNearQuery.java b/lucene/src/java/org/apache/lucene/search/payloads/PayloadNearQuery.java index 37bb6c7d32c..35356f30f7d 100644 --- a/lucene/src/java/org/apache/lucene/search/payloads/PayloadNearQuery.java +++ b/lucene/src/java/org/apache/lucene/search/payloads/PayloadNearQuery.java @@ -17,7 +17,7 @@ package org.apache.lucene.search.payloads; * limitations under the License. */ -import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexReader.AtomicReaderContext; import org.apache.lucene.search.Explanation; import org.apache.lucene.search.Scorer; import org.apache.lucene.search.IndexSearcher; @@ -143,10 +143,9 @@ public class PayloadNearQuery extends SpanNearQuery { } @Override - public Scorer scorer(IndexReader reader, boolean scoreDocsInOrder, - boolean topScorer) throws IOException { - return new PayloadNearSpanScorer(query.getSpans(reader), this, - similarity, reader.norms(query.getField())); + public Scorer scorer(AtomicReaderContext context, ScorerContext scorerContext) throws IOException { + return new PayloadNearSpanScorer(query.getSpans(context), this, + similarity, context.reader.norms(query.getField())); } } @@ -154,7 +153,6 @@ public class PayloadNearQuery extends SpanNearQuery { Spans spans; protected float payloadScore; private int payloadsSeen; - Similarity similarity = getSimilarity(); protected PayloadNearSpanScorer(Spans spans, Weight weight, Similarity similarity, byte[] norms) throws IOException { @@ -212,7 +210,7 @@ public class PayloadNearQuery extends SpanNearQuery { payloadsSeen = 0; do { int matchLength = spans.end() - spans.start(); - freq += getSimilarity().sloppyFreq(matchLength); + freq += similarity.sloppyFreq(matchLength); Spans[] spansArr = new Spans[1]; spansArr[0] = spans; getPayloads(spansArr); @@ -221,6 +219,7 @@ public class PayloadNearQuery extends SpanNearQuery { return true; } + @Override public float score() throws IOException { return super.score() diff --git a/lucene/src/java/org/apache/lucene/search/payloads/PayloadSpanUtil.java b/lucene/src/java/org/apache/lucene/search/payloads/PayloadSpanUtil.java index 18629e61f9e..8236e419c74 100644 --- a/lucene/src/java/org/apache/lucene/search/payloads/PayloadSpanUtil.java +++ b/lucene/src/java/org/apache/lucene/search/payloads/PayloadSpanUtil.java @@ -24,6 +24,8 @@ import java.util.Iterator; import java.util.List; import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexReader.AtomicReaderContext; +import org.apache.lucene.index.IndexReader.ReaderContext; import org.apache.lucene.index.Term; import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanQuery; @@ -38,6 +40,7 @@ import org.apache.lucene.search.spans.SpanOrQuery; import org.apache.lucene.search.spans.SpanQuery; import org.apache.lucene.search.spans.SpanTermQuery; import org.apache.lucene.search.spans.Spans; +import org.apache.lucene.util.ReaderUtil; /** * Experimental class to get set of payloads for most standard Lucene queries. @@ -48,14 +51,16 @@ import org.apache.lucene.search.spans.Spans; * */ public class PayloadSpanUtil { - private IndexReader reader; + private ReaderContext context; /** - * @param reader + * @param context * that contains doc with payloads to extract + * + * @see IndexReader#getTopReaderContext() */ - public PayloadSpanUtil(IndexReader reader) { - this.reader = reader; + public PayloadSpanUtil(ReaderContext context) { + this.context = context; } /** @@ -169,15 +174,16 @@ public class PayloadSpanUtil { private void getPayloads(Collection payloads, SpanQuery query) throws IOException { - Spans spans = query.getSpans(reader); - - while (spans.next() == true) { - if (spans.isPayloadAvailable()) { - Collection payload = spans.getPayload(); - for (byte [] bytes : payload) { - payloads.add(bytes); + final AtomicReaderContext[] leaves = ReaderUtil.leaves(context); + for (AtomicReaderContext atomicReaderContext : leaves) { + final Spans spans = query.getSpans(atomicReaderContext); + while (spans.next() == true) { + if (spans.isPayloadAvailable()) { + Collection payload = spans.getPayload(); + for (byte [] bytes : payload) { + payloads.add(bytes); + } } - } } } diff --git a/lucene/src/java/org/apache/lucene/search/payloads/PayloadTermQuery.java b/lucene/src/java/org/apache/lucene/search/payloads/PayloadTermQuery.java index 1d251447132..81da6a4adf0 100644 --- a/lucene/src/java/org/apache/lucene/search/payloads/PayloadTermQuery.java +++ b/lucene/src/java/org/apache/lucene/search/payloads/PayloadTermQuery.java @@ -17,9 +17,9 @@ package org.apache.lucene.search.payloads; * limitations under the License. */ +import org.apache.lucene.index.IndexReader.AtomicReaderContext; import org.apache.lucene.index.Term; import org.apache.lucene.index.DocsAndPositionsEnum; -import org.apache.lucene.index.IndexReader; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Scorer; import org.apache.lucene.search.Weight; @@ -74,10 +74,9 @@ public class PayloadTermQuery extends SpanTermQuery { } @Override - public Scorer scorer(IndexReader reader, boolean scoreDocsInOrder, - boolean topScorer) throws IOException { - return new PayloadTermSpanScorer((TermSpans) query.getSpans(reader), - this, similarity, reader.norms(query.getField())); + public Scorer scorer(AtomicReaderContext context, ScorerContext scorerContext) throws IOException { + return new PayloadTermSpanScorer((TermSpans) query.getSpans(context), + this, similarity, context.reader.norms(query.getField())); } protected class PayloadTermSpanScorer extends SpanScorer { @@ -101,12 +100,11 @@ public class PayloadTermQuery extends SpanTermQuery { freq = 0.0f; payloadScore = 0; payloadsSeen = 0; - Similarity similarity1 = getSimilarity(); while (more && doc == spans.doc()) { int matchLength = spans.end() - spans.start(); - freq += similarity1.sloppyFreq(matchLength); - processPayload(similarity1); + freq += similarity.sloppyFreq(matchLength); + processPayload(similarity); more = spans.next();// this moves positions to the next match in this // document diff --git a/lucene/src/java/org/apache/lucene/search/spans/FieldMaskingSpanQuery.java b/lucene/src/java/org/apache/lucene/search/spans/FieldMaskingSpanQuery.java index 16c88f30dee..88d0f50afc0 100644 --- a/lucene/src/java/org/apache/lucene/search/spans/FieldMaskingSpanQuery.java +++ b/lucene/src/java/org/apache/lucene/search/spans/FieldMaskingSpanQuery.java @@ -21,11 +21,11 @@ import java.io.IOException; import java.util.Set; import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexReader.AtomicReaderContext; import org.apache.lucene.index.Term; import org.apache.lucene.search.Query; import org.apache.lucene.search.Weight; import org.apache.lucene.search.IndexSearcher; -import org.apache.lucene.search.Similarity; import org.apache.lucene.util.ToStringUtils; /** @@ -92,8 +92,8 @@ public class FieldMaskingSpanQuery extends SpanQuery { // ...this is done to be more consistent with things like SpanFirstQuery @Override - public Spans getSpans(IndexReader reader) throws IOException { - return maskedQuery.getSpans(reader); + public Spans getSpans(AtomicReaderContext context) throws IOException { + return maskedQuery.getSpans(context); } @Override @@ -106,11 +106,6 @@ public class FieldMaskingSpanQuery extends SpanQuery { return maskedQuery.createWeight(searcher); } - @Override - public Similarity getSimilarity(IndexSearcher searcher) { - return maskedQuery.getSimilarity(searcher); - } - @Override public Query rewrite(IndexReader reader) throws IOException { FieldMaskingSpanQuery clone = null; diff --git a/lucene/src/java/org/apache/lucene/search/spans/NearSpansOrdered.java b/lucene/src/java/org/apache/lucene/search/spans/NearSpansOrdered.java index cfac0a7ce8d..2bc9f87d27f 100644 --- a/lucene/src/java/org/apache/lucene/search/spans/NearSpansOrdered.java +++ b/lucene/src/java/org/apache/lucene/search/spans/NearSpansOrdered.java @@ -17,7 +17,7 @@ package org.apache.lucene.search.spans; * limitations under the License. */ -import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexReader.AtomicReaderContext; import org.apache.lucene.util.ArrayUtil; import java.io.IOException; @@ -77,11 +77,11 @@ public class NearSpansOrdered extends Spans { private SpanNearQuery query; private boolean collectPayloads = true; - public NearSpansOrdered(SpanNearQuery spanNearQuery, IndexReader reader) throws IOException { - this(spanNearQuery, reader, true); + public NearSpansOrdered(SpanNearQuery spanNearQuery, AtomicReaderContext context) throws IOException { + this(spanNearQuery, context, true); } - public NearSpansOrdered(SpanNearQuery spanNearQuery, IndexReader reader, boolean collectPayloads) + public NearSpansOrdered(SpanNearQuery spanNearQuery, AtomicReaderContext context, boolean collectPayloads) throws IOException { if (spanNearQuery.getClauses().length < 2) { throw new IllegalArgumentException("Less than 2 clauses: " @@ -94,7 +94,7 @@ public class NearSpansOrdered extends Spans { matchPayload = new LinkedList(); subSpansByDoc = new Spans[clauses.length]; for (int i = 0; i < clauses.length; i++) { - subSpans[i] = clauses[i].getSpans(reader); + subSpans[i] = clauses[i].getSpans(context); subSpansByDoc[i] = subSpans[i]; // used in toSameDoc() } query = spanNearQuery; // kept for toString() only. diff --git a/lucene/src/java/org/apache/lucene/search/spans/NearSpansUnordered.java b/lucene/src/java/org/apache/lucene/search/spans/NearSpansUnordered.java index a2dd6a6de40..d92740a25c6 100644 --- a/lucene/src/java/org/apache/lucene/search/spans/NearSpansUnordered.java +++ b/lucene/src/java/org/apache/lucene/search/spans/NearSpansUnordered.java @@ -17,7 +17,7 @@ package org.apache.lucene.search.spans; * limitations under the License. */ -import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexReader.AtomicReaderContext; import org.apache.lucene.util.PriorityQueue; import java.io.IOException; @@ -131,7 +131,7 @@ public class NearSpansUnordered extends Spans { } - public NearSpansUnordered(SpanNearQuery query, IndexReader reader) + public NearSpansUnordered(SpanNearQuery query, AtomicReaderContext context) throws IOException { this.query = query; this.slop = query.getSlop(); @@ -141,7 +141,7 @@ public class NearSpansUnordered extends Spans { subSpans = new Spans[clauses.length]; for (int i = 0; i < clauses.length; i++) { SpansCell cell = - new SpansCell(clauses[i].getSpans(reader), i); + new SpansCell(clauses[i].getSpans(context), i); ordered.add(cell); subSpans[i] = cell.spans; } diff --git a/lucene/src/java/org/apache/lucene/search/spans/SpanFirstQuery.java b/lucene/src/java/org/apache/lucene/search/spans/SpanFirstQuery.java index 4c9adafb7cf..b90f08eddad 100644 --- a/lucene/src/java/org/apache/lucene/search/spans/SpanFirstQuery.java +++ b/lucene/src/java/org/apache/lucene/search/spans/SpanFirstQuery.java @@ -38,7 +38,7 @@ public class SpanFirstQuery extends SpanPositionRangeQuery { @Override protected AcceptStatus acceptPosition(Spans spans) throws IOException { - assert spans.start() != spans.end(); + assert spans.start() != spans.end() : "start equals end: " + spans.start(); if (spans.start() >= end) return AcceptStatus.NO_AND_ADVANCE; else if (spans.end() <= end) diff --git a/lucene/src/java/org/apache/lucene/search/spans/SpanMultiTermQueryWrapper.java b/lucene/src/java/org/apache/lucene/search/spans/SpanMultiTermQueryWrapper.java index b9fc0bc9301..865e2b1eb46 100644 --- a/lucene/src/java/org/apache/lucene/search/spans/SpanMultiTermQueryWrapper.java +++ b/lucene/src/java/org/apache/lucene/search/spans/SpanMultiTermQueryWrapper.java @@ -20,12 +20,14 @@ package org.apache.lucene.search.spans; import java.io.IOException; import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexReader.AtomicReaderContext; import org.apache.lucene.index.Term; import org.apache.lucene.search.MultiTermQuery; import org.apache.lucene.search.Query; import org.apache.lucene.search.TopTermsRewrite; import org.apache.lucene.search.ScoringRewrite; import org.apache.lucene.search.BooleanClause.Occur; // javadocs only +import org.apache.lucene.util.PerReaderTermState; /** * Wraps any {@link MultiTermQuery} as a {@link SpanQuery}, @@ -87,7 +89,7 @@ public class SpanMultiTermQueryWrapper extends SpanQue } @Override - public Spans getSpans(IndexReader reader) throws IOException { + public Spans getSpans(AtomicReaderContext context) throws IOException { throw new UnsupportedOperationException("Query should have been rewritten"); } @@ -153,7 +155,7 @@ public class SpanMultiTermQueryWrapper extends SpanQue } @Override - protected void addClause(SpanOrQuery topLevel, Term term, int docCount, float boost) { + protected void addClause(SpanOrQuery topLevel, Term term, int docCount, float boost, PerReaderTermState states) { final SpanTermQuery q = new SpanTermQuery(term); q.setBoost(boost); topLevel.addClause(q); @@ -202,7 +204,7 @@ public class SpanMultiTermQueryWrapper extends SpanQue } @Override - protected void addClause(SpanOrQuery topLevel, Term term, int docFreq, float boost) { + protected void addClause(SpanOrQuery topLevel, Term term, int docFreq, float boost, PerReaderTermState states) { final SpanTermQuery q = new SpanTermQuery(term); q.setBoost(boost); topLevel.addClause(q); diff --git a/lucene/src/java/org/apache/lucene/search/spans/SpanNearPayloadCheckQuery.java b/lucene/src/java/org/apache/lucene/search/spans/SpanNearPayloadCheckQuery.java index 5fce135941b..2b17f627327 100644 --- a/lucene/src/java/org/apache/lucene/search/spans/SpanNearPayloadCheckQuery.java +++ b/lucene/src/java/org/apache/lucene/search/spans/SpanNearPayloadCheckQuery.java @@ -72,6 +72,7 @@ public class SpanNearPayloadCheckQuery extends SpanPositionCheckQuery { return AcceptStatus.NO; } + @Override public String toString(String field) { StringBuilder buffer = new StringBuilder(); buffer.append("spanPayCheck("); diff --git a/lucene/src/java/org/apache/lucene/search/spans/SpanNearQuery.java b/lucene/src/java/org/apache/lucene/search/spans/SpanNearQuery.java index d687db2c2d2..4d2dd6dca92 100644 --- a/lucene/src/java/org/apache/lucene/search/spans/SpanNearQuery.java +++ b/lucene/src/java/org/apache/lucene/search/spans/SpanNearQuery.java @@ -27,6 +27,7 @@ import java.util.Set; import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexReader.AtomicReaderContext; import org.apache.lucene.index.Term; import org.apache.lucene.search.Query; import org.apache.lucene.util.ToStringUtils; @@ -116,16 +117,16 @@ public class SpanNearQuery extends SpanQuery implements Cloneable { } @Override - public Spans getSpans(final IndexReader reader) throws IOException { + public Spans getSpans(final AtomicReaderContext context) throws IOException { if (clauses.size() == 0) // optimize 0-clause case - return new SpanOrQuery(getClauses()).getSpans(reader); + return new SpanOrQuery(getClauses()).getSpans(context); if (clauses.size() == 1) // optimize 1-clause case - return clauses.get(0).getSpans(reader); + return clauses.get(0).getSpans(context); return inOrder - ? (Spans) new NearSpansOrdered(this, reader, collectPayloads) - : (Spans) new NearSpansUnordered(this, reader); + ? (Spans) new NearSpansOrdered(this, context, collectPayloads) + : (Spans) new NearSpansUnordered(this, context); } @Override diff --git a/lucene/src/java/org/apache/lucene/search/spans/SpanNotQuery.java b/lucene/src/java/org/apache/lucene/search/spans/SpanNotQuery.java index c6e891bf121..65c54017a8a 100644 --- a/lucene/src/java/org/apache/lucene/search/spans/SpanNotQuery.java +++ b/lucene/src/java/org/apache/lucene/search/spans/SpanNotQuery.java @@ -18,6 +18,7 @@ package org.apache.lucene.search.spans; */ import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexReader.AtomicReaderContext; import org.apache.lucene.index.Term; import org.apache.lucene.search.Query; import org.apache.lucene.util.ToStringUtils; @@ -74,12 +75,12 @@ public class SpanNotQuery extends SpanQuery implements Cloneable { } @Override - public Spans getSpans(final IndexReader reader) throws IOException { + public Spans getSpans(final AtomicReaderContext context) throws IOException { return new Spans() { - private Spans includeSpans = include.getSpans(reader); + private Spans includeSpans = include.getSpans(context); private boolean moreInclude = true; - private Spans excludeSpans = exclude.getSpans(reader); + private Spans excludeSpans = exclude.getSpans(context); private boolean moreExclude = excludeSpans.next(); @Override diff --git a/lucene/src/java/org/apache/lucene/search/spans/SpanOrQuery.java b/lucene/src/java/org/apache/lucene/search/spans/SpanOrQuery.java index 174304fd9dc..2aeeb6dfe1d 100644 --- a/lucene/src/java/org/apache/lucene/search/spans/SpanOrQuery.java +++ b/lucene/src/java/org/apache/lucene/search/spans/SpanOrQuery.java @@ -26,6 +26,7 @@ import java.util.Iterator; import java.util.Set; import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexReader.AtomicReaderContext; import org.apache.lucene.index.Term; import org.apache.lucene.util.PriorityQueue; import org.apache.lucene.util.ToStringUtils; @@ -162,9 +163,9 @@ public class SpanOrQuery extends SpanQuery implements Cloneable { } @Override - public Spans getSpans(final IndexReader reader) throws IOException { + public Spans getSpans(final AtomicReaderContext context) throws IOException { if (clauses.size() == 1) // optimize 1-clause case - return (clauses.get(0)).getSpans(reader); + return (clauses.get(0)).getSpans(context); return new Spans() { private SpanQueue queue = null; @@ -173,7 +174,7 @@ public class SpanOrQuery extends SpanQuery implements Cloneable { queue = new SpanQueue(clauses.size()); Iterator i = clauses.iterator(); while (i.hasNext()) { - Spans spans = i.next().getSpans(reader); + Spans spans = i.next().getSpans(context); if ( ((target == -1) && spans.next()) || ((target != -1) && spans.skipTo(target))) { queue.add(spans); diff --git a/lucene/src/java/org/apache/lucene/search/spans/SpanPayloadCheckQuery.java b/lucene/src/java/org/apache/lucene/search/spans/SpanPayloadCheckQuery.java index 69dbc306f19..086dad2f929 100644 --- a/lucene/src/java/org/apache/lucene/search/spans/SpanPayloadCheckQuery.java +++ b/lucene/src/java/org/apache/lucene/search/spans/SpanPayloadCheckQuery.java @@ -74,6 +74,7 @@ public class SpanPayloadCheckQuery extends SpanPositionCheckQuery{ return AcceptStatus.YES; } + @Override public String toString(String field) { StringBuilder buffer = new StringBuilder(); buffer.append("spanPayCheck("); diff --git a/lucene/src/java/org/apache/lucene/search/spans/SpanPositionCheckQuery.java b/lucene/src/java/org/apache/lucene/search/spans/SpanPositionCheckQuery.java index a2deeb69731..65ac7c1c0dc 100644 --- a/lucene/src/java/org/apache/lucene/search/spans/SpanPositionCheckQuery.java +++ b/lucene/src/java/org/apache/lucene/search/spans/SpanPositionCheckQuery.java @@ -18,6 +18,7 @@ package org.apache.lucene.search.spans; import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexReader.AtomicReaderContext; import org.apache.lucene.index.Term; import org.apache.lucene.search.Query; @@ -80,8 +81,8 @@ public abstract class SpanPositionCheckQuery extends SpanQuery implements Clonea protected abstract AcceptStatus acceptPosition(Spans spans) throws IOException; @Override - public Spans getSpans(final IndexReader reader) throws IOException { - return new PositionCheckSpan(reader); + public Spans getSpans(final AtomicReaderContext context) throws IOException { + return new PositionCheckSpan(context); } @@ -105,8 +106,8 @@ public abstract class SpanPositionCheckQuery extends SpanQuery implements Clonea protected class PositionCheckSpan extends Spans { private Spans spans; - public PositionCheckSpan(IndexReader reader) throws IOException { - spans = match.getSpans(reader); + public PositionCheckSpan(AtomicReaderContext context) throws IOException { + spans = match.getSpans(context); } @Override diff --git a/lucene/src/java/org/apache/lucene/search/spans/SpanQuery.java b/lucene/src/java/org/apache/lucene/search/spans/SpanQuery.java index 35c314b8de2..bd1a13a6887 100644 --- a/lucene/src/java/org/apache/lucene/search/spans/SpanQuery.java +++ b/lucene/src/java/org/apache/lucene/search/spans/SpanQuery.java @@ -19,7 +19,7 @@ package org.apache.lucene.search.spans; import java.io.IOException; -import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexReader.AtomicReaderContext; import org.apache.lucene.search.Query; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Weight; @@ -28,7 +28,7 @@ import org.apache.lucene.search.Weight; public abstract class SpanQuery extends Query { /** Expert: Returns the matches for this query in an index. Used internally * to search for spans. */ - public abstract Spans getSpans(IndexReader reader) throws IOException; + public abstract Spans getSpans(AtomicReaderContext context) throws IOException; /** Returns the name of the field matched by this query.*/ public abstract String getField(); diff --git a/lucene/src/java/org/apache/lucene/search/spans/SpanScorer.java b/lucene/src/java/org/apache/lucene/search/spans/SpanScorer.java index 1d2d9f50bca..8b309a3df68 100644 --- a/lucene/src/java/org/apache/lucene/search/spans/SpanScorer.java +++ b/lucene/src/java/org/apache/lucene/search/spans/SpanScorer.java @@ -36,10 +36,12 @@ public class SpanScorer extends Scorer { protected int doc; protected float freq; - + protected final Similarity similarity; + protected SpanScorer(Spans spans, Weight weight, Similarity similarity, byte[] norms) throws IOException { - super(similarity, weight); + super(weight); + this.similarity = similarity; this.spans = spans; this.norms = norms; this.value = weight.getValue(); @@ -81,7 +83,7 @@ public class SpanScorer extends Scorer { freq = 0.0f; do { int matchLength = spans.end() - spans.start(); - freq += getSimilarity().sloppyFreq(matchLength); + freq += similarity.sloppyFreq(matchLength); more = spans.next(); } while (more && (doc == spans.doc())); return true; @@ -92,8 +94,8 @@ public class SpanScorer extends Scorer { @Override public float score() throws IOException { - float raw = getSimilarity().tf(freq) * value; // raw score - return norms == null? raw : raw * getSimilarity().decodeNormValue(norms[doc]); // normalize + float raw = similarity.tf(freq) * value; // raw score + return norms == null? raw : raw * similarity.decodeNormValue(norms[doc]); // normalize } @Override @@ -109,7 +111,7 @@ public class SpanScorer extends Scorer { int expDoc = advance(doc); float phraseFreq = (expDoc == doc) ? freq : 0.0f; - tfExplanation.setValue(getSimilarity().tf(phraseFreq)); + tfExplanation.setValue(similarity.tf(phraseFreq)); tfExplanation.setDescription("tf(phraseFreq=" + phraseFreq + ")"); return tfExplanation; diff --git a/lucene/src/java/org/apache/lucene/search/spans/SpanTermQuery.java b/lucene/src/java/org/apache/lucene/search/spans/SpanTermQuery.java index a016e3db137..33347861fda 100644 --- a/lucene/src/java/org/apache/lucene/search/spans/SpanTermQuery.java +++ b/lucene/src/java/org/apache/lucene/search/spans/SpanTermQuery.java @@ -18,6 +18,7 @@ package org.apache.lucene.search.spans; */ import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexReader.AtomicReaderContext; import org.apache.lucene.index.Term; import org.apache.lucene.index.DocsAndPositionsEnum; import org.apache.lucene.util.ToStringUtils; @@ -80,7 +81,8 @@ public class SpanTermQuery extends SpanQuery { } @Override - public Spans getSpans(final IndexReader reader) throws IOException { + public Spans getSpans(final AtomicReaderContext context) throws IOException { + final IndexReader reader = context.reader; final DocsAndPositionsEnum postings = reader.termPositionsEnum(reader.getDeletedDocs(), term.field(), term.bytes()); diff --git a/lucene/src/java/org/apache/lucene/search/spans/SpanWeight.java b/lucene/src/java/org/apache/lucene/search/spans/SpanWeight.java index 37451fecb2d..104bacf0a37 100644 --- a/lucene/src/java/org/apache/lucene/search/spans/SpanWeight.java +++ b/lucene/src/java/org/apache/lucene/search/spans/SpanWeight.java @@ -17,7 +17,7 @@ package org.apache.lucene.search.spans; * limitations under the License. */ -import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexReader.AtomicReaderContext; import org.apache.lucene.index.Term; import org.apache.lucene.search.*; import org.apache.lucene.search.Explanation.IDFExplanation; @@ -42,7 +42,7 @@ public class SpanWeight extends Weight { public SpanWeight(SpanQuery query, IndexSearcher searcher) throws IOException { - this.similarity = query.getSimilarity(searcher); + this.similarity = searcher.getSimilarityProvider().get(query.getField()); this.query = query; terms=new HashSet(); @@ -72,13 +72,13 @@ public class SpanWeight extends Weight { } @Override - public Scorer scorer(IndexReader reader, boolean scoreDocsInOrder, boolean topScorer) throws IOException { - return new SpanScorer(query.getSpans(reader), this, similarity, reader + public Scorer scorer(AtomicReaderContext context, ScorerContext scorerContext) throws IOException { + return new SpanScorer(query.getSpans(context), this, similarity, context.reader .norms(query.getField())); } @Override - public Explanation explain(IndexReader reader, int doc) + public Explanation explain(AtomicReaderContext context, int doc) throws IOException { ComplexExplanation result = new ComplexExplanation(); @@ -111,12 +111,12 @@ public class SpanWeight extends Weight { fieldExpl.setDescription("fieldWeight("+field+":"+query.toString(field)+ " in "+doc+"), product of:"); - Explanation tfExpl = ((SpanScorer)scorer(reader, true, false)).explain(doc); + Explanation tfExpl = ((SpanScorer)scorer(context, ScorerContext.def())).explain(doc); fieldExpl.addDetail(tfExpl); fieldExpl.addDetail(idfExpl); Explanation fieldNormExpl = new Explanation(); - byte[] fieldNorms = reader.norms(field); + byte[] fieldNorms = context.reader.norms(field); float fieldNorm = fieldNorms!=null ? similarity.decodeNormValue(fieldNorms[doc]) : 1.0f; fieldNormExpl.setValue(fieldNorm); diff --git a/lucene/src/java/org/apache/lucene/search/spans/Spans.java b/lucene/src/java/org/apache/lucene/search/spans/Spans.java index 2d21e8ef79f..1462f607508 100644 --- a/lucene/src/java/org/apache/lucene/search/spans/Spans.java +++ b/lucene/src/java/org/apache/lucene/search/spans/Spans.java @@ -83,4 +83,5 @@ public abstract class Spans { * @return true if there is a payload available at this position that can be loaded */ public abstract boolean isPayloadAvailable(); + } diff --git a/lucene/src/java/org/apache/lucene/store/BufferedIndexInput.java b/lucene/src/java/org/apache/lucene/store/BufferedIndexInput.java index 17f97473429..d8ed2c771fc 100644 --- a/lucene/src/java/org/apache/lucene/store/BufferedIndexInput.java +++ b/lucene/src/java/org/apache/lucene/store/BufferedIndexInput.java @@ -144,6 +144,68 @@ public abstract class BufferedIndexInput extends IndexInput { } } + @Override + public short readShort() throws IOException { + if (2 <= (bufferLength-bufferPosition)) { + return (short) (((buffer[bufferPosition++] & 0xFF) << 8) | (buffer[bufferPosition++] & 0xFF)); + } else { + return super.readShort(); + } + } + + @Override + public int readInt() throws IOException { + if (4 <= (bufferLength-bufferPosition)) { + return ((buffer[bufferPosition++] & 0xFF) << 24) | ((buffer[bufferPosition++] & 0xFF) << 16) + | ((buffer[bufferPosition++] & 0xFF) << 8) | (buffer[bufferPosition++] & 0xFF); + } else { + return super.readInt(); + } + } + + @Override + public long readLong() throws IOException { + if (8 <= (bufferLength-bufferPosition)) { + final int i1 = ((buffer[bufferPosition++] & 0xff) << 24) | ((buffer[bufferPosition++] & 0xff) << 16) | + ((buffer[bufferPosition++] & 0xff) << 8) | (buffer[bufferPosition++] & 0xff); + final int i2 = ((buffer[bufferPosition++] & 0xff) << 24) | ((buffer[bufferPosition++] & 0xff) << 16) | + ((buffer[bufferPosition++] & 0xff) << 8) | (buffer[bufferPosition++] & 0xff); + return (((long)i1) << 32) | (i2 & 0xFFFFFFFFL); + } else { + return super.readLong(); + } + } + + @Override + public int readVInt() throws IOException { + if (5 <= (bufferLength-bufferPosition)) { + byte b = buffer[bufferPosition++]; + int i = b & 0x7F; + for (int shift = 7; (b & 0x80) != 0; shift += 7) { + b = buffer[bufferPosition++]; + i |= (b & 0x7F) << shift; + } + return i; + } else { + return super.readVInt(); + } + } + + @Override + public long readVLong() throws IOException { + if (9 <= bufferLength-bufferPosition) { + byte b = buffer[bufferPosition++]; + long i = b & 0x7F; + for (int shift = 7; (b & 0x80) != 0; shift += 7) { + b = buffer[bufferPosition++]; + i |= (b & 0x7FL) << shift; + } + return i; + } else { + return super.readVLong(); + } + } + private void refill() throws IOException { long start = bufferStart + bufferPosition; long end = start + bufferSize; diff --git a/lucene/src/java/org/apache/lucene/store/ByteArrayDataInput.java b/lucene/src/java/org/apache/lucene/store/ByteArrayDataInput.java new file mode 100644 index 00000000000..dee9c5de073 --- /dev/null +++ b/lucene/src/java/org/apache/lucene/store/ByteArrayDataInput.java @@ -0,0 +1,111 @@ +package org.apache.lucene.store; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** @lucene.experimental */ +public final class ByteArrayDataInput extends DataInput { + + private byte[] bytes; + + private int pos; + private int limit; + + // TODO: allow BytesRef (slice) too + public ByteArrayDataInput(byte[] bytes) { + this.bytes = bytes; + } + + public void reset(byte[] bytes) { + reset(bytes, 0, bytes.length); + } + + public int getPosition() { + return pos; + } + + public void reset(byte[] bytes, int offset, int len) { + this.bytes = bytes; + pos = offset; + limit = len; + } + + public boolean eof() { + return pos == limit; + } + + public void skipBytes(int count) { + pos += count; + } + + @Override + public short readShort() { + return (short) (((bytes[pos++] & 0xFF) << 8) | (bytes[pos++] & 0xFF)); + } + + @Override + public int readInt() { + return ((bytes[pos++] & 0xFF) << 24) | ((bytes[pos++] & 0xFF) << 16) + | ((bytes[pos++] & 0xFF) << 8) | (bytes[pos++] & 0xFF); + } + + @Override + public long readLong() { + final int i1 = ((bytes[pos++] & 0xff) << 24) | ((bytes[pos++] & 0xff) << 16) | + ((bytes[pos++] & 0xff) << 8) | (bytes[pos++] & 0xff); + final int i2 = ((bytes[pos++] & 0xff) << 24) | ((bytes[pos++] & 0xff) << 16) | + ((bytes[pos++] & 0xff) << 8) | (bytes[pos++] & 0xff); + return (((long)i1) << 32) | (i2 & 0xFFFFFFFFL); + } + + @Override + public int readVInt() { + byte b = bytes[pos++]; + int i = b & 0x7F; + for (int shift = 7; (b & 0x80) != 0; shift += 7) { + b = bytes[pos++]; + i |= (b & 0x7F) << shift; + } + return i; + } + + @Override + public long readVLong() { + byte b = bytes[pos++]; + long i = b & 0x7F; + for (int shift = 7; (b & 0x80) != 0; shift += 7) { + b = bytes[pos++]; + i |= (b & 0x7FL) << shift; + } + return i; + } + + // NOTE: AIOOBE not EOF if you read too much + @Override + public byte readByte() { + assert pos < limit; + return bytes[pos++]; + } + + // NOTE: AIOOBE not EOF if you read too much + @Override + public void readBytes(byte[] b, int offset, int len) { + assert pos + len <= limit; + System.arraycopy(bytes, pos, b, offset, len); + pos += len; + } +} diff --git a/lucene/src/java/org/apache/lucene/store/DataInput.java b/lucene/src/java/org/apache/lucene/store/DataInput.java index d6242b58b0a..943ca4c7514 100644 --- a/lucene/src/java/org/apache/lucene/store/DataInput.java +++ b/lucene/src/java/org/apache/lucene/store/DataInput.java @@ -79,7 +79,7 @@ public abstract class DataInput implements Cloneable { * supported. * @see DataOutput#writeVInt(int) */ - public final int readVInt() throws IOException { + public int readVInt() throws IOException { byte b = readByte(); int i = b & 0x7F; for (int shift = 7; (b & 0x80) != 0; shift += 7) { @@ -99,7 +99,7 @@ public abstract class DataInput implements Cloneable { /** Reads a long stored in variable-length format. Reads between one and * nine bytes. Smaller values take fewer bytes. Negative numbers are not * supported. */ - public final long readVLong() throws IOException { + public long readVLong() throws IOException { byte b = readByte(); long i = b & 0x7F; for (int shift = 7; (b & 0x80) != 0; shift += 7) { diff --git a/lucene/src/java/org/apache/lucene/store/FSLockFactory.java b/lucene/src/java/org/apache/lucene/store/FSLockFactory.java index bd705892adf..1bca363b088 100644 --- a/lucene/src/java/org/apache/lucene/store/FSLockFactory.java +++ b/lucene/src/java/org/apache/lucene/store/FSLockFactory.java @@ -33,7 +33,7 @@ public abstract class FSLockFactory extends LockFactory { /** * Set the lock directory. This method can be only called * once to initialize the lock directory. It is used by {@link FSDirectory} - * to set the lock directory to itsself. + * to set the lock directory to itself. * Subclasses can also use this method to set the directory * in the constructor. */ diff --git a/lucene/src/java/org/apache/lucene/store/NativeFSLockFactory.java b/lucene/src/java/org/apache/lucene/store/NativeFSLockFactory.java index 53c30a65651..f4f63e6d2fc 100755 --- a/lucene/src/java/org/apache/lucene/store/NativeFSLockFactory.java +++ b/lucene/src/java/org/apache/lucene/store/NativeFSLockFactory.java @@ -60,7 +60,7 @@ public class NativeFSLockFactory extends FSLockFactory { * Create a NativeFSLockFactory instance, with null (unset) * lock directory. When you pass this factory to a {@link FSDirectory} * subclass, the lock directory is automatically set to the - * directory itsself. Be sure to create one instance for each directory + * directory itself. Be sure to create one instance for each directory * your create! */ public NativeFSLockFactory() throws IOException { diff --git a/lucene/src/java/org/apache/lucene/store/RAMInputStream.java b/lucene/src/java/org/apache/lucene/store/RAMInputStream.java index 15f87d5e595..b898f7b38dd 100644 --- a/lucene/src/java/org/apache/lucene/store/RAMInputStream.java +++ b/lucene/src/java/org/apache/lucene/store/RAMInputStream.java @@ -83,6 +83,7 @@ class RAMInputStream extends IndexInput implements Cloneable { } private final void switchCurrentBuffer(boolean enforceEOF) throws IOException { + bufferStart = (long) BUFFER_SIZE * (long) currentBufferIndex; if (currentBufferIndex >= file.numBuffers()) { // end of file reached, no more buffers left if (enforceEOF) @@ -95,7 +96,6 @@ class RAMInputStream extends IndexInput implements Cloneable { } else { currentBuffer = file.getBuffer(currentBufferIndex); bufferPosition = 0; - bufferStart = (long) BUFFER_SIZE * (long) currentBufferIndex; long buflen = length - bufferStart; bufferLength = buflen > BUFFER_SIZE ? BUFFER_SIZE : (int) buflen; } diff --git a/lucene/src/java/org/apache/lucene/store/SimpleFSLockFactory.java b/lucene/src/java/org/apache/lucene/store/SimpleFSLockFactory.java index dc8d73fe390..1f532aa509f 100755 --- a/lucene/src/java/org/apache/lucene/store/SimpleFSLockFactory.java +++ b/lucene/src/java/org/apache/lucene/store/SimpleFSLockFactory.java @@ -57,7 +57,7 @@ public class SimpleFSLockFactory extends FSLockFactory { * Create a SimpleFSLockFactory instance, with null (unset) * lock directory. When you pass this factory to a {@link FSDirectory} * subclass, the lock directory is automatically set to the - * directory itsself. Be sure to create one instance for each directory + * directory itself. Be sure to create one instance for each directory * your create! */ public SimpleFSLockFactory() throws IOException { diff --git a/lucene/src/java/org/apache/lucene/util/ArrayUtil.java b/lucene/src/java/org/apache/lucene/util/ArrayUtil.java index 23d38978c33..31a415cfe71 100644 --- a/lucene/src/java/org/apache/lucene/util/ArrayUtil.java +++ b/lucene/src/java/org/apache/lucene/util/ArrayUtil.java @@ -647,7 +647,7 @@ public final class ArrayUtil { /** * Sorts the given array slice using the {@link Comparator}. This method uses the insertion sort - * algorithm. It is only recommened to use this algorithm for partially sorted small arrays! + * algorithm. It is only recommended to use this algorithm for partially sorted small arrays! * @param fromIndex start index (inclusive) * @param toIndex end index (exclusive) */ @@ -657,7 +657,7 @@ public final class ArrayUtil { /** * Sorts the given array using the {@link Comparator}. This method uses the insertion sort - * algorithm. It is only recommened to use this algorithm for partially sorted small arrays! + * algorithm. It is only recommended to use this algorithm for partially sorted small arrays! */ public static void insertionSort(T[] a, Comparator comp) { insertionSort(a, 0, a.length, comp); @@ -665,7 +665,7 @@ public final class ArrayUtil { /** * Sorts the given array slice in natural order. This method uses the insertion sort - * algorithm. It is only recommened to use this algorithm for partially sorted small arrays! + * algorithm. It is only recommended to use this algorithm for partially sorted small arrays! * @param fromIndex start index (inclusive) * @param toIndex end index (exclusive) */ @@ -675,7 +675,7 @@ public final class ArrayUtil { /** * Sorts the given array in natural order. This method uses the insertion sort - * algorithm. It is only recommened to use this algorithm for partially sorted small arrays! + * algorithm. It is only recommended to use this algorithm for partially sorted small arrays! */ public static > void insertionSort(T[] a) { insertionSort(a, 0, a.length); diff --git a/lucene/src/java/org/apache/lucene/util/AttributeImpl.java b/lucene/src/java/org/apache/lucene/util/AttributeImpl.java index c8bf649b6bf..d22491bf2c6 100644 --- a/lucene/src/java/org/apache/lucene/util/AttributeImpl.java +++ b/lucene/src/java/org/apache/lucene/util/AttributeImpl.java @@ -20,6 +20,8 @@ package org.apache.lucene.util; import java.io.Serializable; import java.lang.reflect.Field; import java.lang.reflect.Modifier; +import java.lang.ref.WeakReference; +import java.util.LinkedList; /** * Base class for Attributes that can be added to a @@ -37,71 +39,79 @@ public abstract class AttributeImpl implements Cloneable, Serializable, Attribut public abstract void clear(); /** - * The default implementation of this method accesses all declared - * fields of this object and prints the values in the following syntax: + * This method returns the current attribute values as a string in the following format + * by calling the {@link #reflectWith(AttributeReflector)} method: * - *
    -   *   public String toString() {
    -   *     return "start=" + startOffset + ",end=" + endOffset;
    -   *   }
    -   * 
    - * - * This method may be overridden by subclasses. + *
      + *
    • iff {@code prependAttClass=true}: {@code "AttributeClass#key=value,AttributeClass#key=value"} + *
    • iff {@code prependAttClass=false}: {@code "key=value,key=value"} + *
    + * + * @see #reflectWith(AttributeReflector) */ - @Override - public String toString() { - StringBuilder buffer = new StringBuilder(); - Class clazz = this.getClass(); - Field[] fields = clazz.getDeclaredFields(); - try { - for (int i = 0; i < fields.length; i++) { - Field f = fields[i]; - if (Modifier.isStatic(f.getModifiers())) continue; - f.setAccessible(true); - Object value = f.get(this); - if (buffer.length()>0) { + public final String reflectAsString(final boolean prependAttClass) { + final StringBuilder buffer = new StringBuilder(); + reflectWith(new AttributeReflector() { + public void reflect(Class attClass, String key, Object value) { + if (buffer.length() > 0) { buffer.append(','); } - if (value == null) { - buffer.append(f.getName() + "=null"); - } else { - buffer.append(f.getName() + "=" + value); + if (prependAttClass) { + buffer.append(attClass.getName()).append('#'); } + buffer.append(key).append('=').append((value == null) ? "null" : value); + } + }); + return buffer.toString(); + } + + /** + * This method is for introspection of attributes, it should simply + * add the key/values this attribute holds to the given {@link AttributeReflector}. + * + *

    The default implementation calls {@link AttributeReflector#reflect} for all + * non-static fields from the implementing class, using the field name as key + * and the field value as value. The Attribute class is also determined by reflection. + * Please note that the default implementation can only handle single-Attribute + * implementations. + * + *

    Custom implementations look like this (e.g. for a combined attribute implementation): + *

    +   *   public void reflectWith(AttributeReflector reflector) {
    +   *     reflector.reflect(CharTermAttribute.class, "term", term());
    +   *     reflector.reflect(PositionIncrementAttribute.class, "positionIncrement", getPositionIncrement());
    +   *   }
    +   * 
    + * + *

    If you implement this method, make sure that for each invocation, the same set of {@link Attribute} + * interfaces and keys are passed to {@link AttributeReflector#reflect} in the same order, but possibly + * different values. So don't automatically exclude e.g. {@code null} properties! + * + * @see #reflectAsString(boolean) + */ + public void reflectWith(AttributeReflector reflector) { + final Class clazz = this.getClass(); + final LinkedList>> interfaces = AttributeSource.getAttributeInterfaces(clazz); + if (interfaces.size() != 1) { + throw new UnsupportedOperationException(clazz.getName() + + " implements more than one Attribute interface, the default reflectWith() implementation cannot handle this."); + } + final Class interf = interfaces.getFirst().get(); + final Field[] fields = clazz.getDeclaredFields(); + try { + for (int i = 0; i < fields.length; i++) { + final Field f = fields[i]; + if (Modifier.isStatic(f.getModifiers())) continue; + f.setAccessible(true); + reflector.reflect(interf, f.getName(), f.get(this)); } } catch (IllegalAccessException e) { // this should never happen, because we're just accessing fields // from 'this' throw new RuntimeException(e); } - - return buffer.toString(); } - /** - * Subclasses must implement this method and should compute - * a hashCode similar to this: - *

    -   *   public int hashCode() {
    -   *     int code = startOffset;
    -   *     code = code * 31 + endOffset;
    -   *     return code;
    -   *   }
    -   * 
    - * - * see also {@link #equals(Object)} - */ - @Override - public abstract int hashCode(); - - /** - * All values used for computation of {@link #hashCode()} - * should be checked here for equality. - * - * see also {@link Object#equals(Object)} - */ - @Override - public abstract boolean equals(Object other); - /** * Copies the values from this Attribute into the passed-in * target attribute. The target implementation must support all the diff --git a/lucene/src/java/org/apache/lucene/util/AttributeReflector.java b/lucene/src/java/org/apache/lucene/util/AttributeReflector.java new file mode 100644 index 00000000000..c64d04cbc9f --- /dev/null +++ b/lucene/src/java/org/apache/lucene/util/AttributeReflector.java @@ -0,0 +1,34 @@ +package org.apache.lucene.util; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * This interface is used to reflect contents of {@link AttributeSource} or {@link AttributeImpl}. + */ +public interface AttributeReflector { + + /** + * This method gets called for every property in an {@link AttributeImpl}/{@link AttributeSource} + * passing the class name of the {@link Attribute}, a key and the actual value. + * E.g., an invocation of {@link org.apache.lucene.analysis.tokenattributes.CharTermAttributeImpl#reflectWith} + * would call this method once using {@code org.apache.lucene.analysis.tokenattributes.CharTermAttribute.class} + * as attribute class, {@code "term"} as key and the actual value as a String. + */ + public void reflect(Class attClass, String key, Object value); + +} diff --git a/lucene/src/java/org/apache/lucene/util/AttributeSource.java b/lucene/src/java/org/apache/lucene/util/AttributeSource.java index 1af3763eb6c..c76638f482a 100644 --- a/lucene/src/java/org/apache/lucene/util/AttributeSource.java +++ b/lucene/src/java/org/apache/lucene/util/AttributeSource.java @@ -180,20 +180,9 @@ public class AttributeSource { private static final WeakHashMap,LinkedList>>> knownImplClasses = new WeakHashMap,LinkedList>>>(); - /** Expert: Adds a custom AttributeImpl instance with one or more Attribute interfaces. - *

    Please note: It is not guaranteed, that att is added to - * the AttributeSource, because the provided attributes may already exist. - * You should always retrieve the wanted attributes using {@link #getAttribute} after adding - * with this method and cast to your class. - * The recommended way to use custom implementations is using an {@link AttributeFactory}. - *

    - */ - public void addAttributeImpl(final AttributeImpl att) { - final Class clazz = att.getClass(); - if (attributeImpls.containsKey(clazz)) return; - LinkedList>> foundInterfaces; + static LinkedList>> getAttributeInterfaces(final Class clazz) { synchronized(knownImplClasses) { - foundInterfaces = knownImplClasses.get(clazz); + LinkedList>> foundInterfaces = knownImplClasses.get(clazz); if (foundInterfaces == null) { // we have a strong reference to the class instance holding all interfaces in the list (parameter "att"), // so all WeakReferences are never evicted by GC @@ -210,7 +199,23 @@ public class AttributeSource { actClazz = actClazz.getSuperclass(); } while (actClazz != null); } + return foundInterfaces; } + } + + /** Expert: Adds a custom AttributeImpl instance with one or more Attribute interfaces. + *

    Please note: It is not guaranteed, that att is added to + * the AttributeSource, because the provided attributes may already exist. + * You should always retrieve the wanted attributes using {@link #getAttribute} after adding + * with this method and cast to your class. + * The recommended way to use custom implementations is using an {@link AttributeFactory}. + *

    + */ + public final void addAttributeImpl(final AttributeImpl att) { + final Class clazz = att.getClass(); + if (attributeImpls.containsKey(clazz)) return; + final LinkedList>> foundInterfaces = + getAttributeInterfaces(clazz); // add all interfaces of this AttributeImpl to the maps for (WeakReference> curInterfaceRef : foundInterfaces) { @@ -233,7 +238,7 @@ public class AttributeSource { * already in this AttributeSource and returns it. Otherwise a * new instance is created, added to this AttributeSource and returned. */ - public A addAttribute(Class attClass) { + public final A addAttribute(Class attClass) { AttributeImpl attImpl = attributes.get(attClass); if (attImpl == null) { if (!(attClass.isInterface() && Attribute.class.isAssignableFrom(attClass))) { @@ -248,7 +253,7 @@ public class AttributeSource { } /** Returns true, iff this AttributeSource has any attributes */ - public boolean hasAttributes() { + public final boolean hasAttributes() { return !this.attributes.isEmpty(); } @@ -256,7 +261,7 @@ public class AttributeSource { * The caller must pass in a Class<? extends Attribute> value. * Returns true, iff this AttributeSource contains the passed-in Attribute. */ - public boolean hasAttribute(Class attClass) { + public final boolean hasAttribute(Class attClass) { return this.attributes.containsKey(attClass); } @@ -271,7 +276,7 @@ public class AttributeSource { * available. If you want to only use the attribute, if it is available (to optimize * consuming), use {@link #hasAttribute}. */ - public A getAttribute(Class attClass) { + public final A getAttribute(Class attClass) { AttributeImpl attImpl = attributes.get(attClass); if (attImpl == null) { throw new IllegalArgumentException("This AttributeSource does not have the attribute '" + attClass.getName() + "'."); @@ -319,7 +324,7 @@ public class AttributeSource { * Resets all Attributes in this AttributeSource by calling * {@link AttributeImpl#clear()} on each Attribute implementation. */ - public void clearAttributes() { + public final void clearAttributes() { if (hasAttributes()) { if (currentState == null) { computeCurrentState(); @@ -334,7 +339,7 @@ public class AttributeSource { * Captures the state of all Attributes. The return value can be passed to * {@link #restoreState} to restore the state of this or another AttributeSource. */ - public State captureState() { + public final State captureState() { if (!hasAttributes()) { return null; } @@ -360,7 +365,7 @@ public class AttributeSource { * reset its value to the default, in which case the caller should first * call {@link TokenStream#clearAttributes()} on the targetStream. */ - public void restoreState(State state) { + public final void restoreState(State state) { if (state == null) return; do { @@ -431,21 +436,53 @@ public class AttributeSource { return false; } - @Override - public String toString() { - StringBuilder sb = new StringBuilder().append('('); + /** + * This method returns the current attribute values as a string in the following format + * by calling the {@link #reflectWith(AttributeReflector)} method: + * + *
      + *
    • iff {@code prependAttClass=true}: {@code "AttributeClass#key=value,AttributeClass#key=value"} + *
    • iff {@code prependAttClass=false}: {@code "key=value,key=value"} + *
    + * + * @see #reflectWith(AttributeReflector) + */ + public final String reflectAsString(final boolean prependAttClass) { + final StringBuilder buffer = new StringBuilder(); + reflectWith(new AttributeReflector() { + public void reflect(Class attClass, String key, Object value) { + if (buffer.length() > 0) { + buffer.append(','); + } + if (prependAttClass) { + buffer.append(attClass.getName()).append('#'); + } + buffer.append(key).append('=').append((value == null) ? "null" : value); + } + }); + return buffer.toString(); + } + + /** + * This method is for introspection of attributes, it should simply + * add the key/values this AttributeSource holds to the given {@link AttributeReflector}. + * + *

    This method iterates over all Attribute implementations and calls the + * corresponding {@link AttributeImpl#reflectWith} method.

    + * + * @see AttributeImpl#reflectWith + */ + public final void reflectWith(AttributeReflector reflector) { if (hasAttributes()) { if (currentState == null) { computeCurrentState(); } for (State state = currentState; state != null; state = state.next) { - if (state != currentState) sb.append(','); - sb.append(state.attribute.toString()); + state.attribute.reflectWith(reflector); } } - return sb.append(')').toString(); } - + /** * Performs a clone of all {@link AttributeImpl} instances returned in a new * {@code AttributeSource} instance. This method can be used to e.g. create another TokenStream @@ -453,7 +490,7 @@ public class AttributeSource { * You can also use it as a (non-performant) replacement for {@link #captureState}, if you need to look * into / modify the captured state. */ - public AttributeSource cloneAttributes() { + public final AttributeSource cloneAttributes() { final AttributeSource clone = new AttributeSource(this.factory); if (hasAttributes()) { diff --git a/lucene/src/java/org/apache/lucene/util/BytesRef.java b/lucene/src/java/org/apache/lucene/util/BytesRef.java index 342cc6d7aab..16dcc08499c 100644 --- a/lucene/src/java/org/apache/lucene/util/BytesRef.java +++ b/lucene/src/java/org/apache/lucene/util/BytesRef.java @@ -210,6 +210,7 @@ public final class BytesRef implements Comparable, Externalizable { } /** Returns hex encoded bytes, eg [0x6c 0x75 0x63 0x65 0x6e 0x65] */ + @Override public String toString() { StringBuilder sb = new StringBuilder(); sb.append('['); diff --git a/lucene/src/java/org/apache/lucene/util/BytesRefHash.java b/lucene/src/java/org/apache/lucene/util/BytesRefHash.java index c5b180cfa44..f4278b6a068 100644 --- a/lucene/src/java/org/apache/lucene/util/BytesRefHash.java +++ b/lucene/src/java/org/apache/lucene/util/BytesRefHash.java @@ -38,7 +38,7 @@ import org.apache.lucene.util.ByteBlockPool.DirectAllocator; *

    * Note: The maximum capacity {@link BytesRef} instance passed to * {@link #add(BytesRef)} must not be longer than {@link ByteBlockPool#BYTE_BLOCK_SIZE}-2. - * The internal storage is limited to 2GB totalbyte storage. + * The internal storage is limited to 2GB total byte storage. *

    * * @lucene.internal diff --git a/lucene/src/java/org/apache/lucene/util/CollectionUtil.java b/lucene/src/java/org/apache/lucene/util/CollectionUtil.java index 7e60fd19b43..ff2a76a6b87 100644 --- a/lucene/src/java/org/apache/lucene/util/CollectionUtil.java +++ b/lucene/src/java/org/apache/lucene/util/CollectionUtil.java @@ -140,7 +140,7 @@ public final class CollectionUtil { /** * Sorts the given random access {@link List} using the {@link Comparator}. * The list must implement {@link RandomAccess}. This method uses the insertion sort - * algorithm. It is only recommened to use this algorithm for partially sorted small lists! + * algorithm. It is only recommended to use this algorithm for partially sorted small lists! * @throws IllegalArgumentException if list is e.g. a linked list without random access. */ public static void insertionSort(List list, Comparator comp) { @@ -150,7 +150,7 @@ public final class CollectionUtil { /** * Sorts the given random access {@link List} in natural order. * The list must implement {@link RandomAccess}. This method uses the insertion sort - * algorithm. It is only recommened to use this algorithm for partially sorted small lists! + * algorithm. It is only recommended to use this algorithm for partially sorted small lists! * @throws IllegalArgumentException if list is e.g. a linked list without random access. */ public static > void insertionSort(List list) { diff --git a/lucene/src/java/org/apache/lucene/util/Constants.java b/lucene/src/java/org/apache/lucene/util/Constants.java index c991eb62366..13b67c9f69a 100644 --- a/lucene/src/java/org/apache/lucene/util/Constants.java +++ b/lucene/src/java/org/apache/lucene/util/Constants.java @@ -70,6 +70,9 @@ public final class Constants { return s.toString(); } + // NOTE: we track per-segment version as a String with the "X.Y" format, e.g. + // "4.0", "3.1", "3.0". Therefore when we change this constant, we should keep + // the format. public static final String LUCENE_MAIN_VERSION = ident("4.0"); public static final String LUCENE_VERSION; diff --git a/lucene/src/java/org/apache/lucene/util/DoubleBarrelLRUCache.java b/lucene/src/java/org/apache/lucene/util/DoubleBarrelLRUCache.java index a0dd7c19f08..a476bd2b0f8 100644 --- a/lucene/src/java/org/apache/lucene/util/DoubleBarrelLRUCache.java +++ b/lucene/src/java/org/apache/lucene/util/DoubleBarrelLRUCache.java @@ -45,6 +45,7 @@ import java.util.Map; final public class DoubleBarrelLRUCache { public static abstract class CloneableKey { + @Override abstract public Object clone(); } @@ -73,7 +74,7 @@ final public class DoubleBarrelLRUCache { } } + @Override public String toString() { StringBuilder sb = new StringBuilder(); sb.append('['); diff --git a/lucene/src/java/org/apache/lucene/util/MapBackedSet.java b/lucene/src/java/org/apache/lucene/util/MapBackedSet.java new file mode 100644 index 00000000000..9db05ec86ba --- /dev/null +++ b/lucene/src/java/org/apache/lucene/util/MapBackedSet.java @@ -0,0 +1,73 @@ +package org.apache.lucene.util; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.Serializable; +import java.util.AbstractSet; +import java.util.Iterator; +import java.util.Map; + +/** + * A Set implementation that wraps an actual Map based + * implementation. + * + * @lucene.internal + */ +public final class MapBackedSet extends AbstractSet implements Serializable { + + private static final long serialVersionUID = -6761513279741915432L; + + private final Map map; + + /** + * Creates a new instance which wraps the specified {@code map}. + */ + public MapBackedSet(Map map) { + this.map = map; + } + + @Override + public int size() { + return map.size(); + } + + @Override + public boolean contains(Object o) { + return map.containsKey(o); + } + + @Override + public boolean add(E o) { + return map.put(o, Boolean.TRUE) == null; + } + + @Override + public boolean remove(Object o) { + return map.remove(o) != null; + } + + @Override + public void clear() { + map.clear(); + } + + @Override + public Iterator iterator() { + return map.keySet().iterator(); + } +} diff --git a/lucene/src/java/org/apache/lucene/util/NumericUtils.java b/lucene/src/java/org/apache/lucene/util/NumericUtils.java index 0a08f95e60d..232461ddcf5 100644 --- a/lucene/src/java/org/apache/lucene/util/NumericUtils.java +++ b/lucene/src/java/org/apache/lucene/util/NumericUtils.java @@ -22,8 +22,6 @@ import org.apache.lucene.document.NumericField; import org.apache.lucene.search.NumericRangeFilter; import org.apache.lucene.search.NumericRangeQuery; // for javadocs -// TODO: Remove the commented out methods before release! - /** * This is a helper class to generate prefix-encoded representations for numerical values * and supplies converters to represent float/double values as sortable integers/longs. diff --git a/lucene/src/java/org/apache/lucene/util/PerReaderTermState.java b/lucene/src/java/org/apache/lucene/util/PerReaderTermState.java new file mode 100644 index 00000000000..81ad4ce2f3b --- /dev/null +++ b/lucene/src/java/org/apache/lucene/util/PerReaderTermState.java @@ -0,0 +1,148 @@ +package org.apache.lucene.util; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.util.Arrays; + +import org.apache.lucene.index.Fields; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.Term; +import org.apache.lucene.index.TermState; +import org.apache.lucene.index.Terms; +import org.apache.lucene.index.TermsEnum; +import org.apache.lucene.index.IndexReader.AtomicReaderContext; +import org.apache.lucene.index.IndexReader.ReaderContext; +import org.apache.lucene.index.TermsEnum.SeekStatus; + +/** + * Maintains a {@link IndexReader} {@link TermState} view over + * {@link IndexReader} instances containing a single term. The + * {@link PerReaderTermState} doesn't track if the given {@link TermState} + * objects are valid, neither if the {@link TermState} instances refer to the + * same terms in the associated readers. + * + * @lucene.experimental + */ +public final class PerReaderTermState { + public final ReaderContext topReaderContext; // for asserting! + private final TermState[] states; + private int docFreq; + + /** + * Creates an empty {@link PerReaderTermState} from a {@link ReaderContext} + */ + public PerReaderTermState(ReaderContext context) { + assert context != null && context.isTopLevel; + topReaderContext = context; + docFreq = 0; + final int len; + if (context.leaves() == null) { + len = 1; + } else { + len = context.leaves().length; + } + states = new TermState[len]; + } + + /** + * Creates a {@link PerReaderTermState} with an initial {@link TermState}, + * {@link IndexReader} pair. + */ + public PerReaderTermState(ReaderContext context, TermState state, int ord, int docFreq) { + this(context); + register(state, ord, docFreq); + } + + /** + * Creates a {@link PerReaderTermState} from a top-level {@link ReaderContext} and the + * given {@link Term}. This method will lookup the given term in all context's leaf readers + * and register each of the readers containing the term in the returned {@link PerReaderTermState} + * using the leaf reader's ordinal. + *

    + * Note: the given context must be a top-level context. + */ + public static PerReaderTermState build(ReaderContext context, Term term, boolean cache) + throws IOException { + assert context != null && context.isTopLevel; + final String field = term.field(); + final BytesRef bytes = term.bytes(); + final PerReaderTermState perReaderTermState = new PerReaderTermState(context); + final AtomicReaderContext[] leaves = ReaderUtil.leaves(context); + for (int i = 0; i < leaves.length; i++) { + final Fields fields = leaves[i].reader.fields(); + if (fields != null) { + final Terms terms = fields.terms(field); + if (terms != null) { + final TermsEnum termsEnum = terms.getThreadTermsEnum(); // thread-private don't share! + if (SeekStatus.FOUND == termsEnum.seek(bytes, cache)) { + final TermState termState = termsEnum.termState(); + perReaderTermState.register(termState, leaves[i].ord, termsEnum.docFreq()); + } + } + } + } + return perReaderTermState; + } + + /** + * Clears the {@link PerReaderTermState} internal state and removes all + * registered {@link TermState}s + */ + public void clear() { + docFreq = 0; + Arrays.fill(states, null); + } + + /** + * Registers and associates a {@link TermState} with an leaf ordinal. The leaf ordinal + * should be derived from a {@link ReaderContext}'s leaf ord. + */ + public void register(TermState state, final int ord, final int docFreq) { + assert state != null : "state must not be null"; + assert ord >= 0 && ord < states.length; + assert states[ord] == null : "state for ord: " + ord + + " already registered"; + this.docFreq += docFreq; + states[ord] = state; + } + + /** + * Returns the {@link TermState} for an leaf ordinal or null if no + * {@link TermState} for the ordinal was registered. + * + * @param ord + * the readers leaf ordinal to get the {@link TermState} for. + * @return the {@link TermState} for the given readers ord or null if no + * {@link TermState} for the reader was registered + */ + public TermState get(int ord) { + assert ord >= 0 && ord < states.length; + return states[ord]; + } + + /** + * Returns the accumulated document frequency of all {@link TermState} + * instances passed to {@link #register(TermState, int, int)}. + * @return the accumulated document frequency of all {@link TermState} + * instances passed to {@link #register(TermState, int, int)}. + */ + public int docFreq() { + return docFreq; + } +} \ No newline at end of file diff --git a/lucene/src/java/org/apache/lucene/util/ReaderUtil.java b/lucene/src/java/org/apache/lucene/util/ReaderUtil.java index 430fc9bf38e..8d772880d0b 100644 --- a/lucene/src/java/org/apache/lucene/util/ReaderUtil.java +++ b/lucene/src/java/org/apache/lucene/util/ReaderUtil.java @@ -22,6 +22,9 @@ import java.util.List; import java.io.IOException; import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexReader.AtomicReaderContext; +import org.apache.lucene.index.IndexReader.CompositeReaderContext; +import org.apache.lucene.index.IndexReader.ReaderContext; /** * Common util methods for dealing with {@link IndexReader}s. @@ -44,6 +47,7 @@ public final class ReaderUtil { this.readerIndex = readerIndex; } + @Override public String toString() { return "slice start=" + start + " length=" + length + " readerIndex=" + readerIndex; } @@ -148,7 +152,94 @@ public final class ReaderUtil { .toArray(new IndexReader[subReadersList.size()]); return subReaders[subIndex]; } + + public static ReaderContext buildReaderContext(IndexReader reader) { + return new ReaderContextBuilder(reader).build(); + } + + public static class ReaderContextBuilder { + private final IndexReader reader; + private final AtomicReaderContext[] leaves; + private int leafOrd = 0; + private int leafDocBase = 0; + public ReaderContextBuilder(IndexReader reader) { + this.reader = reader; + leaves = new AtomicReaderContext[numLeaves(reader)]; + } + + public ReaderContext build() { + return build(null, reader, 0, 0); + } + + private ReaderContext build(CompositeReaderContext parent, IndexReader reader, int ord, int docBase) { + IndexReader[] sequentialSubReaders = reader.getSequentialSubReaders(); + if (sequentialSubReaders == null) { + AtomicReaderContext atomic = new AtomicReaderContext(parent, reader, ord, docBase, leafOrd, leafDocBase); + leaves[leafOrd++] = atomic; + leafDocBase += reader.maxDoc(); + return atomic; + } else { + ReaderContext[] children = new ReaderContext[sequentialSubReaders.length]; + final CompositeReaderContext newParent; + if (parent == null) { + newParent = new CompositeReaderContext(reader, children, leaves); + } else { + newParent = new CompositeReaderContext(parent, reader, ord, docBase, children); + } + + int newDocBase = 0; + for (int i = 0; i < sequentialSubReaders.length; i++) { + build(newParent, sequentialSubReaders[i], i, newDocBase); + newDocBase += sequentialSubReaders[i].maxDoc(); + } + return newParent; + } + } + + private int numLeaves(IndexReader reader) { + final int[] numLeaves = new int[1]; + try { + new Gather(reader) { + @Override + protected void add(int base, IndexReader r) { + numLeaves[0]++; + } + }.run(); + } catch (IOException ioe) { + // won't happen + throw new RuntimeException(ioe); + } + return numLeaves[0]; + } + + } + /** + * Returns the context's leaves or the context itself as the only element of + * the returned array. If the context's #leaves() method returns + * null the given context must be an instance of + * {@link AtomicReaderContext} + */ + public static AtomicReaderContext[] leaves(ReaderContext context) { + assert context != null && context.isTopLevel : "context must be non-null & top-level"; + final AtomicReaderContext[] leaves = context.leaves(); + if (leaves == null) { + assert context.isAtomic : "top-level context without leaves must be atomic"; + return new AtomicReaderContext[] { (AtomicReaderContext) context }; + } + return leaves; + } + + /** + * Walks up the reader tree and return the given context's top level reader + * context, or in other words the reader tree's root context. + */ + public static ReaderContext getTopLevelContext(ReaderContext context) { + while (context.parent != null) { + context = context.parent; + } + return context; + } /** * Returns index of the searcher/reader for document n in the @@ -175,4 +266,30 @@ public final class ReaderUtil { } return hi; } + + /** + * Returns index of the searcher/reader for document n in the + * array used to construct this searcher/reader. + */ + public static int subIndex(int n, AtomicReaderContext[] leaves) { // find + // searcher/reader for doc n: + int size = leaves.length; + int lo = 0; // search starts array + int hi = size - 1; // for first element less than n, return its index + while (hi >= lo) { + int mid = (lo + hi) >>> 1; + int midValue = leaves[mid].docBase; + if (n < midValue) + hi = mid - 1; + else if (n > midValue) + lo = mid + 1; + else { // found a match + while (mid + 1 < size && leaves[mid + 1].docBase == midValue) { + mid++; // scan to last match + } + return mid; + } + } + return hi; + } } diff --git a/lucene/src/java/org/apache/lucene/util/SetOnce.java b/lucene/src/java/org/apache/lucene/util/SetOnce.java index 3366b3cb918..f280ee78522 100644 --- a/lucene/src/java/org/apache/lucene/util/SetOnce.java +++ b/lucene/src/java/org/apache/lucene/util/SetOnce.java @@ -49,7 +49,7 @@ public final class SetOnce { } /** - * Creates a new instnace with the internal object set to the given object. + * Creates a new instance with the internal object set to the given object. * Note that any calls to {@link #set(Object)} afterwards will result in * {@link AlreadySetException} * diff --git a/lucene/src/java/org/apache/lucene/util/SorterTemplate.java b/lucene/src/java/org/apache/lucene/util/SorterTemplate.java index 1be58152b76..b0e558c1c20 100644 --- a/lucene/src/java/org/apache/lucene/util/SorterTemplate.java +++ b/lucene/src/java/org/apache/lucene/util/SorterTemplate.java @@ -36,14 +36,14 @@ public abstract class SorterTemplate { protected abstract void swap(int i, int j); /** Compares slots {@code i} and {@code j} of you data. - * Should be implemented like valueOf(j).compareTo(valueOf(i)) */ + * Should be implemented like valueOf(i).compareTo(valueOf(j)) */ protected abstract int compare(int i, int j); /** Implement this method, that stores the value of slot {@code i} as pivot value */ protected abstract void setPivot(int i); /** Implements the compare function for the previously stored pivot value. - * Should be implemented like pivot.compareTo(valueOf(i)) */ + * Should be implemented like pivot.compareTo(valueOf(j)) */ protected abstract int comparePivot(int j); /** Sorts via stable in-place InsertionSort algorithm diff --git a/lucene/src/java/org/apache/lucene/util/automaton/Automaton.java b/lucene/src/java/org/apache/lucene/util/automaton/Automaton.java index de611379b44..d4f0f229ba4 100644 --- a/lucene/src/java/org/apache/lucene/util/automaton/Automaton.java +++ b/lucene/src/java/org/apache/lucene/util/automaton/Automaton.java @@ -66,6 +66,13 @@ import org.apache.lucene.util.RamUsageEstimator; * assumed by the built-in automata operations. * *

    + *

    + * Note: This class has internal mutable state and is not thread safe. It is + * the caller's responsibility to ensure any necessary synchronization if you + * wish to use the same Automaton from multiple threads. In general it is instead + * recommended to use a {@link RunAutomaton} for multithreaded matching: it is immutable, + * thread safe, and much faster. + *

    * @lucene.experimental */ public class Automaton implements Serializable, Cloneable { diff --git a/lucene/src/java/org/apache/lucene/util/automaton/BasicOperations.java b/lucene/src/java/org/apache/lucene/util/automaton/BasicOperations.java index ce1c19001b2..e7e9b301482 100644 --- a/lucene/src/java/org/apache/lucene/util/automaton/BasicOperations.java +++ b/lucene/src/java/org/apache/lucene/util/automaton/BasicOperations.java @@ -483,10 +483,12 @@ final public class BasicOperations { starts.count = 0; } + @Override public boolean equals(Object other) { return ((PointTransitions) other).point == point; } + @Override public int hashCode() { return point; } @@ -563,6 +565,7 @@ final public class BasicOperations { find(1+t.max).ends.add(t); } + @Override public String toString() { StringBuilder s = new StringBuilder(); for(int i=0;iRunAutomaton from a deterministic @@ -160,7 +152,6 @@ public abstract class RunAutomaton implements Serializable { } else { classmap = null; } - this.automaton = a; } /** diff --git a/lucene/src/java/org/apache/lucene/util/automaton/SortedIntSet.java b/lucene/src/java/org/apache/lucene/util/automaton/SortedIntSet.java index 1d143736db3..d1f3e28feb3 100644 --- a/lucene/src/java/org/apache/lucene/util/automaton/SortedIntSet.java +++ b/lucene/src/java/org/apache/lucene/util/automaton/SortedIntSet.java @@ -159,10 +159,12 @@ final class SortedIntSet { return new FrozenIntSet(c, hashCode, state); } + @Override public int hashCode() { return hashCode; } + @Override public boolean equals(Object _other) { if (_other == null) { return false; @@ -186,6 +188,7 @@ final class SortedIntSet { return true; } + @Override public String toString() { StringBuilder sb = new StringBuilder().append('['); for(int i=0;i { } } - // Not private beacaus NodeHash needs access: + // Not private because NodeHash needs access: Arc readFirstRealArc(int address, Arc arc) throws IOException { final BytesReader in = getBytesReader(address); diff --git a/lucene/src/java/org/apache/lucene/util/automaton/fst/PairOutputs.java b/lucene/src/java/org/apache/lucene/util/automaton/fst/PairOutputs.java index 64275bc55fd..fc8aa6691f3 100644 --- a/lucene/src/java/org/apache/lucene/util/automaton/fst/PairOutputs.java +++ b/lucene/src/java/org/apache/lucene/util/automaton/fst/PairOutputs.java @@ -55,6 +55,7 @@ public class PairOutputs extends Outputs> { } } + @Override public int hashCode() { return output1.hashCode() + output2.hashCode(); } diff --git a/lucene/src/java/org/apache/lucene/util/automaton/fst/package.html b/lucene/src/java/org/apache/lucene/util/automaton/fst/package.html new file mode 100644 index 00000000000..c5be56e42fc --- /dev/null +++ b/lucene/src/java/org/apache/lucene/util/automaton/fst/package.html @@ -0,0 +1,25 @@ + + + + + + + +Finite state transducers + + diff --git a/lucene/src/java/org/apache/lucene/util/packed/Packed32.java b/lucene/src/java/org/apache/lucene/util/packed/Packed32.java index c8bb011c209..ff22ad7ef48 100644 --- a/lucene/src/java/org/apache/lucene/util/packed/Packed32.java +++ b/lucene/src/java/org/apache/lucene/util/packed/Packed32.java @@ -214,6 +214,7 @@ class Packed32 extends PackedInts.ReaderImpl implements PackedInts.Mutable { Arrays.fill(blocks, 0); } + @Override public String toString() { return "Packed32(bitsPerValue=" + bitsPerValue + ", maxPos=" + maxPos + ", elements.length=" + blocks.length + ")"; diff --git a/lucene/src/java/org/apache/lucene/util/packed/Packed64.java b/lucene/src/java/org/apache/lucene/util/packed/Packed64.java index 691cec42a77..8428c9e36f2 100644 --- a/lucene/src/java/org/apache/lucene/util/packed/Packed64.java +++ b/lucene/src/java/org/apache/lucene/util/packed/Packed64.java @@ -199,6 +199,7 @@ class Packed64 extends PackedInts.ReaderImpl implements PackedInts.Mutable { | ((value << shifts[base + 2]) & writeMasks[base+2]); } + @Override public String toString() { return "Packed64(bitsPerValue=" + bitsPerValue + ", size=" + size() + ", maxPos=" + maxPos diff --git a/lucene/src/java/org/apache/lucene/util/packed/PackedInts.java b/lucene/src/java/org/apache/lucene/util/packed/PackedInts.java index 71d525d2b0b..c7f670c6eaa 100644 --- a/lucene/src/java/org/apache/lucene/util/packed/PackedInts.java +++ b/lucene/src/java/org/apache/lucene/util/packed/PackedInts.java @@ -251,7 +251,7 @@ public class PackedInts { /** Returns how many bits are required to hold values up * to and including maxValue - * @param maxValue the maximum value tha should be representable. + * @param maxValue the maximum value that should be representable. * @return the amount of bits needed to represent values from 0 to maxValue. * @lucene.internal */ diff --git a/lucene/src/java/org/apache/lucene/util/packed/PackedWriter.java b/lucene/src/java/org/apache/lucene/util/packed/PackedWriter.java index 0cf054991ba..b2c86dd799f 100644 --- a/lucene/src/java/org/apache/lucene/util/packed/PackedWriter.java +++ b/lucene/src/java/org/apache/lucene/util/packed/PackedWriter.java @@ -106,6 +106,7 @@ class PackedWriter extends PackedInts.Writer { } } + @Override public String toString() { return "PackedWriter(written " + written + "/" + valueCount + " with " + bitsPerValue + " bits/value)"; diff --git a/lucene/src/java/org/apache/lucene/util/packed/package.html b/lucene/src/java/org/apache/lucene/util/packed/package.html index b98aa234276..d1d0e298ea1 100644 --- a/lucene/src/java/org/apache/lucene/util/packed/package.html +++ b/lucene/src/java/org/apache/lucene/util/packed/package.html @@ -1,4 +1,20 @@ + diff --git a/lucene/src/java/overview.html b/lucene/src/java/overview.html index cf1da4fd65c..486da10133a 100644 --- a/lucene/src/java/overview.html +++ b/lucene/src/java/overview.html @@ -45,48 +45,36 @@ to check if the results are what we expect):

    -
    - - - - - - +
    +    Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_CURRENT);
     
    -
    - -    Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_CURRENT);
    -
    -    // Store the index in memory:
    -    Directory directory = new RAMDirectory();
    -    // To store an index on disk, use this instead:
    -    //Directory directory = FSDirectory.open("/tmp/testindex");
    -    IndexWriter iwriter = new IndexWriter(directory, analyzer, true,
    -                                          new IndexWriter.MaxFieldLength(25000));
    -    Document doc = new Document();
    -    String text = "This is the text to be indexed.";
    -    doc.add(new Field("fieldname", text, Field.Store.YES,
    -        Field.Index.ANALYZED));
    -    iwriter.addDocument(doc);
    -    iwriter.close();
    -    
    -    // Now search the index:
    -    IndexSearcher isearcher = new IndexSearcher(directory, true)// read-only=true
    -    // Parse a simple query that searches for "text":
    -    QueryParser parser = new QueryParser("fieldname", analyzer);
    -    Query query = parser.parse("text");
    -    ScoreDoc[] hits = isearcher.search(query, null, 1000).scoreDocs;
    -    assertEquals(1, hits.length);
    -    // Iterate through the results:
    -    for (int i = 0; i < hits.length; i++) {
    -      Document hitDoc = isearcher.doc(hits[i].doc);
    -      assertEquals("This is the text to be indexed.", hitDoc.get("fieldname"));
    -    }
    -    isearcher.close();
    -    directory.close();
    - -
    -
    + // Store the index in memory: + Directory directory = new RAMDirectory(); + // To store an index on disk, use this instead: + //Directory directory = FSDirectory.open("/tmp/testindex"); + IndexWriter iwriter = new IndexWriter(directory, analyzer, true, + new IndexWriter.MaxFieldLength(25000)); + Document doc = new Document(); + String text = "This is the text to be indexed."; + doc.add(new Field("fieldname", text, Field.Store.YES, + Field.Index.ANALYZED)); + iwriter.addDocument(doc); + iwriter.close(); + + // Now search the index: + IndexSearcher isearcher = new IndexSearcher(directory, true); // read-only=true + // Parse a simple query that searches for "text": + QueryParser parser = new QueryParser("fieldname", analyzer); + Query query = parser.parse("text"); + ScoreDoc[] hits = isearcher.search(query, null, 1000).scoreDocs; + assertEquals(1, hits.length); + // Iterate through the results: + for (int i = 0; i < hits.length; i++) { + Document hitDoc = isearcher.doc(hits[i].doc); + assertEquals("This is the text to be indexed.", hitDoc.get("fieldname")); + } + isearcher.close(); + directory.close(); diff --git a/lucene/src/test/org/apache/lucene/analysis/BaseTokenStreamTestCase.java b/lucene/src/test-framework/org/apache/lucene/analysis/BaseTokenStreamTestCase.java similarity index 100% rename from lucene/src/test/org/apache/lucene/analysis/BaseTokenStreamTestCase.java rename to lucene/src/test-framework/org/apache/lucene/analysis/BaseTokenStreamTestCase.java diff --git a/lucene/src/test/org/apache/lucene/analysis/MockAnalyzer.java b/lucene/src/test-framework/org/apache/lucene/analysis/MockAnalyzer.java similarity index 100% rename from lucene/src/test/org/apache/lucene/analysis/MockAnalyzer.java rename to lucene/src/test-framework/org/apache/lucene/analysis/MockAnalyzer.java diff --git a/lucene/src/test/org/apache/lucene/analysis/MockPayloadAnalyzer.java b/lucene/src/test-framework/org/apache/lucene/analysis/MockPayloadAnalyzer.java similarity index 100% rename from lucene/src/test/org/apache/lucene/analysis/MockPayloadAnalyzer.java rename to lucene/src/test-framework/org/apache/lucene/analysis/MockPayloadAnalyzer.java diff --git a/lucene/src/test/org/apache/lucene/analysis/MockTokenFilter.java b/lucene/src/test-framework/org/apache/lucene/analysis/MockTokenFilter.java similarity index 100% rename from lucene/src/test/org/apache/lucene/analysis/MockTokenFilter.java rename to lucene/src/test-framework/org/apache/lucene/analysis/MockTokenFilter.java diff --git a/lucene/src/test/org/apache/lucene/analysis/MockTokenizer.java b/lucene/src/test-framework/org/apache/lucene/analysis/MockTokenizer.java similarity index 100% rename from lucene/src/test/org/apache/lucene/analysis/MockTokenizer.java rename to lucene/src/test-framework/org/apache/lucene/analysis/MockTokenizer.java diff --git a/lucene/src/test/org/apache/lucene/index/DocHelper.java b/lucene/src/test-framework/org/apache/lucene/index/DocHelper.java similarity index 97% rename from lucene/src/test/org/apache/lucene/index/DocHelper.java rename to lucene/src/test-framework/org/apache/lucene/index/DocHelper.java index d6095f8a144..28bcdff4f7f 100644 --- a/lucene/src/test/org/apache/lucene/index/DocHelper.java +++ b/lucene/src/test-framework/org/apache/lucene/index/DocHelper.java @@ -28,7 +28,7 @@ import org.apache.lucene.analysis.MockTokenizer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.Fieldable; -import org.apache.lucene.search.Similarity; +import org.apache.lucene.search.SimilarityProvider; import org.apache.lucene.store.Directory; import static org.apache.lucene.util.LuceneTestCase.TEST_VERSION_CURRENT; @@ -220,7 +220,7 @@ class DocHelper { */ public static SegmentInfo writeDoc(Directory dir, Document doc) throws IOException { - return writeDoc(dir, new MockAnalyzer(MockTokenizer.WHITESPACE, false), Similarity.getDefault(), doc); + return writeDoc(dir, new MockAnalyzer(MockTokenizer.WHITESPACE, false), null, doc); } /** @@ -233,9 +233,9 @@ class DocHelper { * @param doc * @throws IOException */ - public static SegmentInfo writeDoc(Directory dir, Analyzer analyzer, Similarity similarity, Document doc) throws IOException { + public static SegmentInfo writeDoc(Directory dir, Analyzer analyzer, SimilarityProvider similarity, Document doc) throws IOException { IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig( - TEST_VERSION_CURRENT, analyzer).setSimilarity(similarity)); + TEST_VERSION_CURRENT, analyzer).setSimilarityProvider(similarity)); //writer.setUseCompoundFile(false); writer.addDocument(doc); writer.commit(); diff --git a/lucene/src/test/org/apache/lucene/index/MockIndexInput.java b/lucene/src/test-framework/org/apache/lucene/index/MockIndexInput.java similarity index 100% rename from lucene/src/test/org/apache/lucene/index/MockIndexInput.java rename to lucene/src/test-framework/org/apache/lucene/index/MockIndexInput.java diff --git a/lucene/src/test-framework/org/apache/lucene/index/MockRandomMergePolicy.java b/lucene/src/test-framework/org/apache/lucene/index/MockRandomMergePolicy.java new file mode 100644 index 00000000000..e8bc977931b --- /dev/null +++ b/lucene/src/test-framework/org/apache/lucene/index/MockRandomMergePolicy.java @@ -0,0 +1,95 @@ +package org.apache.lucene.index; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.util.Collections; +import java.util.Random; +import java.util.Set; + +import org.apache.lucene.util._TestUtil; + +public class MockRandomMergePolicy extends MergePolicy { + private final Random random; + + public MockRandomMergePolicy(Random random) { + // fork a private random, since we are called + // unpredictably from threads: + this.random = new Random(random.nextLong()); + } + + @Override + public MergeSpecification findMerges(SegmentInfos segmentInfos) { + MergeSpecification mergeSpec = null; + //System.out.println("MRMP: findMerges sis=" + segmentInfos); + + if (segmentInfos.size() > 1 && random.nextInt(5) == 3) { + + SegmentInfos segmentInfos2 = new SegmentInfos(); + segmentInfos2.addAll(segmentInfos); + Collections.shuffle(segmentInfos2, random); + + // TODO: sometimes make more than 1 merge? + mergeSpec = new MergeSpecification(); + final int segsToMerge = _TestUtil.nextInt(random, 1, segmentInfos.size()); + mergeSpec.add(new OneMerge(segmentInfos2.range(0, segsToMerge))); + } + + return mergeSpec; + } + + @Override + public MergeSpecification findMergesForOptimize( + SegmentInfos segmentInfos, int maxSegmentCount, Set segmentsToOptimize) + throws CorruptIndexException, IOException { + + //System.out.println("MRMP: findMergesForOptimize sis=" + segmentInfos); + MergeSpecification mergeSpec = null; + if (segmentInfos.size() > 1 || (segmentInfos.size() == 1 && segmentInfos.info(0).hasDeletions())) { + mergeSpec = new MergeSpecification(); + SegmentInfos segmentInfos2 = new SegmentInfos(); + segmentInfos2.addAll(segmentInfos); + Collections.shuffle(segmentInfos2, random); + int upto = 0; + while(upto < segmentInfos.size()) { + int max = Math.min(10, segmentInfos.size()-upto); + int inc = max <= 2 ? max : _TestUtil.nextInt(random, 2, max); + mergeSpec.add(new OneMerge(segmentInfos2.range(upto, upto+inc))); + upto += inc; + } + } + return mergeSpec; + } + + @Override + public MergeSpecification findMergesToExpungeDeletes( + SegmentInfos segmentInfos) + throws CorruptIndexException, IOException { + return findMerges(segmentInfos); + } + + @Override + public void close() { + } + + @Override + public boolean useCompoundFile(SegmentInfos infos, SegmentInfo mergedInfo) throws IOException { + // 80% of the time we create CFS: + return random.nextInt(5) != 1; + } +} diff --git a/lucene/src/test/org/apache/lucene/index/RandomIndexWriter.java b/lucene/src/test-framework/org/apache/lucene/index/RandomIndexWriter.java similarity index 100% rename from lucene/src/test/org/apache/lucene/index/RandomIndexWriter.java rename to lucene/src/test-framework/org/apache/lucene/index/RandomIndexWriter.java diff --git a/lucene/src/test/org/apache/lucene/index/codecs/mockintblock/MockFixedIntBlockCodec.java b/lucene/src/test-framework/org/apache/lucene/index/codecs/mockintblock/MockFixedIntBlockCodec.java similarity index 85% rename from lucene/src/test/org/apache/lucene/index/codecs/mockintblock/MockFixedIntBlockCodec.java rename to lucene/src/test-framework/org/apache/lucene/index/codecs/mockintblock/MockFixedIntBlockCodec.java index 3ff14c2ecd3..fc50b4a817b 100644 --- a/lucene/src/test/org/apache/lucene/index/codecs/mockintblock/MockFixedIntBlockCodec.java +++ b/lucene/src/test-framework/org/apache/lucene/index/codecs/mockintblock/MockFixedIntBlockCodec.java @@ -37,8 +37,8 @@ import org.apache.lucene.index.codecs.FixedGapTermsIndexReader; import org.apache.lucene.index.codecs.FixedGapTermsIndexWriter; import org.apache.lucene.index.codecs.PostingsWriterBase; import org.apache.lucene.index.codecs.PostingsReaderBase; -import org.apache.lucene.index.codecs.PrefixCodedTermsReader; -import org.apache.lucene.index.codecs.PrefixCodedTermsWriter; +import org.apache.lucene.index.codecs.BlockTermsReader; +import org.apache.lucene.index.codecs.BlockTermsWriter; import org.apache.lucene.index.codecs.TermsIndexReaderBase; import org.apache.lucene.index.codecs.TermsIndexWriterBase; import org.apache.lucene.index.codecs.standard.StandardCodec; @@ -126,7 +126,7 @@ public class MockFixedIntBlockCodec extends Codec { success = false; try { - FieldsConsumer ret = new PrefixCodedTermsWriter(indexWriter, state, postingsWriter, BytesRef.getUTF8SortedAsUnicodeComparator()); + FieldsConsumer ret = new BlockTermsWriter(indexWriter, state, postingsWriter, BytesRef.getUTF8SortedAsUnicodeComparator()); success = true; return ret; } finally { @@ -164,15 +164,15 @@ public class MockFixedIntBlockCodec extends Codec { success = false; try { - FieldsProducer ret = new PrefixCodedTermsReader(indexReader, - state.dir, - state.fieldInfos, - state.segmentInfo.name, - postingsReader, - state.readBufferSize, - BytesRef.getUTF8SortedAsUnicodeComparator(), - StandardCodec.TERMS_CACHE_SIZE, - state.codecId); + FieldsProducer ret = new BlockTermsReader(indexReader, + state.dir, + state.fieldInfos, + state.segmentInfo.name, + postingsReader, + state.readBufferSize, + BytesRef.getUTF8SortedAsUnicodeComparator(), + StandardCodec.TERMS_CACHE_SIZE, + state.codecId); success = true; return ret; } finally { @@ -189,14 +189,14 @@ public class MockFixedIntBlockCodec extends Codec { @Override public void files(Directory dir, SegmentInfo segmentInfo, String codecId, Set files) { SepPostingsReaderImpl.files(segmentInfo, codecId, files); - PrefixCodedTermsReader.files(dir, segmentInfo, codecId, files); + BlockTermsReader.files(dir, segmentInfo, codecId, files); FixedGapTermsIndexReader.files(dir, segmentInfo, codecId, files); } @Override public void getExtensions(Set extensions) { SepPostingsWriterImpl.getExtensions(extensions); - PrefixCodedTermsReader.getExtensions(extensions); + BlockTermsReader.getExtensions(extensions); FixedGapTermsIndexReader.getIndexExtensions(extensions); } } diff --git a/lucene/src/test/org/apache/lucene/index/codecs/mockintblock/MockVariableIntBlockCodec.java b/lucene/src/test-framework/org/apache/lucene/index/codecs/mockintblock/MockVariableIntBlockCodec.java similarity index 87% rename from lucene/src/test/org/apache/lucene/index/codecs/mockintblock/MockVariableIntBlockCodec.java rename to lucene/src/test-framework/org/apache/lucene/index/codecs/mockintblock/MockVariableIntBlockCodec.java index 3894657b835..82b8615f433 100644 --- a/lucene/src/test/org/apache/lucene/index/codecs/mockintblock/MockVariableIntBlockCodec.java +++ b/lucene/src/test-framework/org/apache/lucene/index/codecs/mockintblock/MockVariableIntBlockCodec.java @@ -37,8 +37,8 @@ import org.apache.lucene.index.codecs.FixedGapTermsIndexReader; import org.apache.lucene.index.codecs.FixedGapTermsIndexWriter; import org.apache.lucene.index.codecs.PostingsWriterBase; import org.apache.lucene.index.codecs.PostingsReaderBase; -import org.apache.lucene.index.codecs.PrefixCodedTermsReader; -import org.apache.lucene.index.codecs.PrefixCodedTermsWriter; +import org.apache.lucene.index.codecs.BlockTermsReader; +import org.apache.lucene.index.codecs.BlockTermsWriter; import org.apache.lucene.index.codecs.TermsIndexReaderBase; import org.apache.lucene.index.codecs.TermsIndexWriterBase; import org.apache.lucene.index.codecs.standard.StandardCodec; @@ -150,7 +150,7 @@ public class MockVariableIntBlockCodec extends Codec { success = false; try { - FieldsConsumer ret = new PrefixCodedTermsWriter(indexWriter, state, postingsWriter, BytesRef.getUTF8SortedAsUnicodeComparator()); + FieldsConsumer ret = new BlockTermsWriter(indexWriter, state, postingsWriter, BytesRef.getUTF8SortedAsUnicodeComparator()); success = true; return ret; } finally { @@ -189,15 +189,15 @@ public class MockVariableIntBlockCodec extends Codec { success = false; try { - FieldsProducer ret = new PrefixCodedTermsReader(indexReader, - state.dir, - state.fieldInfos, - state.segmentInfo.name, - postingsReader, - state.readBufferSize, - BytesRef.getUTF8SortedAsUnicodeComparator(), - StandardCodec.TERMS_CACHE_SIZE, - state.codecId); + FieldsProducer ret = new BlockTermsReader(indexReader, + state.dir, + state.fieldInfos, + state.segmentInfo.name, + postingsReader, + state.readBufferSize, + BytesRef.getUTF8SortedAsUnicodeComparator(), + StandardCodec.TERMS_CACHE_SIZE, + state.codecId); success = true; return ret; } finally { @@ -214,14 +214,14 @@ public class MockVariableIntBlockCodec extends Codec { @Override public void files(Directory dir, SegmentInfo segmentInfo, String codecId, Set files) { SepPostingsReaderImpl.files(segmentInfo, codecId, files); - PrefixCodedTermsReader.files(dir, segmentInfo, codecId, files); + BlockTermsReader.files(dir, segmentInfo, codecId, files); FixedGapTermsIndexReader.files(dir, segmentInfo, codecId, files); } @Override public void getExtensions(Set extensions) { SepPostingsWriterImpl.getExtensions(extensions); - PrefixCodedTermsReader.getExtensions(extensions); + BlockTermsReader.getExtensions(extensions); FixedGapTermsIndexReader.getIndexExtensions(extensions); } } diff --git a/lucene/src/test/org/apache/lucene/index/codecs/mockrandom/MockRandomCodec.java b/lucene/src/test-framework/org/apache/lucene/index/codecs/mockrandom/MockRandomCodec.java similarity index 85% rename from lucene/src/test/org/apache/lucene/index/codecs/mockrandom/MockRandomCodec.java rename to lucene/src/test-framework/org/apache/lucene/index/codecs/mockrandom/MockRandomCodec.java index b12a78a1031..745c619cb87 100644 --- a/lucene/src/test/org/apache/lucene/index/codecs/mockrandom/MockRandomCodec.java +++ b/lucene/src/test-framework/org/apache/lucene/index/codecs/mockrandom/MockRandomCodec.java @@ -26,6 +26,9 @@ import org.apache.lucene.index.IndexFileNames; import org.apache.lucene.index.SegmentInfo; import org.apache.lucene.index.SegmentReadState; import org.apache.lucene.index.SegmentWriteState; +import org.apache.lucene.index.FieldInfo; +import org.apache.lucene.index.codecs.BlockTermsReader; +import org.apache.lucene.index.codecs.BlockTermsWriter; import org.apache.lucene.index.codecs.Codec; import org.apache.lucene.index.codecs.FieldsConsumer; import org.apache.lucene.index.codecs.FieldsProducer; @@ -33,8 +36,7 @@ import org.apache.lucene.index.codecs.FixedGapTermsIndexReader; import org.apache.lucene.index.codecs.FixedGapTermsIndexWriter; import org.apache.lucene.index.codecs.PostingsReaderBase; import org.apache.lucene.index.codecs.PostingsWriterBase; -import org.apache.lucene.index.codecs.PrefixCodedTermsReader; -import org.apache.lucene.index.codecs.PrefixCodedTermsWriter; +import org.apache.lucene.index.codecs.TermStats; import org.apache.lucene.index.codecs.TermsIndexReaderBase; import org.apache.lucene.index.codecs.TermsIndexWriterBase; import org.apache.lucene.index.codecs.VariableGapTermsIndexReader; @@ -66,7 +68,7 @@ public class MockRandomCodec extends Codec { public MockRandomCodec(Random random) { name = "MockRandom"; - this.seedRandom = random; + this.seedRandom = new Random(random.nextLong()); } @Override @@ -108,11 +110,11 @@ public class MockRandomCodec extends Codec { } if (random.nextBoolean()) { - final int freqCutoff = _TestUtil.nextInt(random, 1, 20); + final int totTFCutoff = _TestUtil.nextInt(random, 1, 20); if (LuceneTestCase.VERBOSE) { - System.out.println("MockRandomCodec: pulsing postings with freqCutoff=" + freqCutoff); + System.out.println("MockRandomCodec: pulsing postings with totTFCutoff=" + totTFCutoff); } - postingsWriter = new PulsingPostingsWriterImpl(freqCutoff, postingsWriter); + postingsWriter = new PulsingPostingsWriterImpl(totTFCutoff, postingsWriter); } final TermsIndexWriterBase indexWriter; @@ -148,9 +150,13 @@ public class MockRandomCodec extends Codec { final Random rand = new Random(seed2); @Override - public boolean isIndexTerm(BytesRef term, int docFreq) { + public boolean isIndexTerm(BytesRef term, TermStats stats) { return random.nextInt(gap) == 17; } + + @Override + public void newField(FieldInfo fieldInfo) { + } }; } indexWriter = new VariableGapTermsIndexWriter(state, selector); @@ -164,7 +170,7 @@ public class MockRandomCodec extends Codec { success = false; try { - FieldsConsumer ret = new PrefixCodedTermsWriter(indexWriter, state, postingsWriter, BytesRef.getUTF8SortedAsUnicodeComparator()); + FieldsConsumer ret = new BlockTermsWriter(indexWriter, state, postingsWriter, BytesRef.getUTF8SortedAsUnicodeComparator()); success = true; return ret; } finally { @@ -218,9 +224,9 @@ public class MockRandomCodec extends Codec { } if (random.nextBoolean()) { - final int freqCutoff = _TestUtil.nextInt(random, 1, 20); + final int totTFCutoff = _TestUtil.nextInt(random, 1, 20); if (LuceneTestCase.VERBOSE) { - System.out.println("MockRandomCodec: reading pulsing postings with freqCutoff=" + freqCutoff); + System.out.println("MockRandomCodec: reading pulsing postings with totTFCutoff=" + totTFCutoff); } postingsReader = new PulsingPostingsReaderImpl(postingsReader); } @@ -230,7 +236,11 @@ public class MockRandomCodec extends Codec { try { if (random.nextBoolean()) { - state.termsIndexDivisor = _TestUtil.nextInt(random, 1, 10); + // if termsIndexDivisor is set to -1, we should not touch it. It means a + // test explicitly instructed not to load the terms index. + if (state.termsIndexDivisor != -1) { + state.termsIndexDivisor = _TestUtil.nextInt(random, 1, 10); + } if (LuceneTestCase.VERBOSE) { System.out.println("MockRandomCodec: fixed-gap terms index (divisor=" + state.termsIndexDivisor + ")"); } @@ -250,7 +260,9 @@ public class MockRandomCodec extends Codec { if (LuceneTestCase.VERBOSE) { System.out.println("MockRandomCodec: variable-gap terms index (divisor=" + state.termsIndexDivisor + ")"); } - state.termsIndexDivisor = _TestUtil.nextInt(random, 1, 10); + if (state.termsIndexDivisor != -1) { + state.termsIndexDivisor = _TestUtil.nextInt(random, 1, 10); + } indexReader = new VariableGapTermsIndexReader(state.dir, state.fieldInfos, state.segmentInfo.name, @@ -268,15 +280,15 @@ public class MockRandomCodec extends Codec { success = false; try { - FieldsProducer ret = new PrefixCodedTermsReader(indexReader, - state.dir, - state.fieldInfos, - state.segmentInfo.name, - postingsReader, - state.readBufferSize, - BytesRef.getUTF8SortedAsUnicodeComparator(), - termsCacheSize, - state.codecId); + FieldsProducer ret = new BlockTermsReader(indexReader, + state.dir, + state.fieldInfos, + state.segmentInfo.name, + postingsReader, + state.readBufferSize, + BytesRef.getUTF8SortedAsUnicodeComparator(), + termsCacheSize, + state.codecId); success = true; return ret; } finally { @@ -296,7 +308,7 @@ public class MockRandomCodec extends Codec { files.add(seedFileName); SepPostingsReaderImpl.files(segmentInfo, codecId, files); StandardPostingsReader.files(dir, segmentInfo, codecId, files); - PrefixCodedTermsReader.files(dir, segmentInfo, codecId, files); + BlockTermsReader.files(dir, segmentInfo, codecId, files); FixedGapTermsIndexReader.files(dir, segmentInfo, codecId, files); VariableGapTermsIndexReader.files(dir, segmentInfo, codecId, files); @@ -314,7 +326,7 @@ public class MockRandomCodec extends Codec { @Override public void getExtensions(Set extensions) { SepPostingsWriterImpl.getExtensions(extensions); - PrefixCodedTermsReader.getExtensions(extensions); + BlockTermsReader.getExtensions(extensions); FixedGapTermsIndexReader.getIndexExtensions(extensions); VariableGapTermsIndexReader.getIndexExtensions(extensions); extensions.add(SEED_EXT); diff --git a/lucene/src/test/org/apache/lucene/index/codecs/mocksep/MockSepCodec.java b/lucene/src/test-framework/org/apache/lucene/index/codecs/mocksep/MockSepCodec.java similarity index 80% rename from lucene/src/test/org/apache/lucene/index/codecs/mocksep/MockSepCodec.java rename to lucene/src/test-framework/org/apache/lucene/index/codecs/mocksep/MockSepCodec.java index a5d0b5b1828..e1e93587abe 100644 --- a/lucene/src/test/org/apache/lucene/index/codecs/mocksep/MockSepCodec.java +++ b/lucene/src/test-framework/org/apache/lucene/index/codecs/mocksep/MockSepCodec.java @@ -30,8 +30,8 @@ import org.apache.lucene.index.codecs.FixedGapTermsIndexReader; import org.apache.lucene.index.codecs.FixedGapTermsIndexWriter; import org.apache.lucene.index.codecs.PostingsReaderBase; import org.apache.lucene.index.codecs.PostingsWriterBase; -import org.apache.lucene.index.codecs.PrefixCodedTermsReader; -import org.apache.lucene.index.codecs.PrefixCodedTermsWriter; +import org.apache.lucene.index.codecs.BlockTermsReader; +import org.apache.lucene.index.codecs.BlockTermsWriter; import org.apache.lucene.index.codecs.TermsIndexReaderBase; import org.apache.lucene.index.codecs.TermsIndexWriterBase; import org.apache.lucene.index.codecs.standard.StandardCodec; @@ -70,7 +70,7 @@ public class MockSepCodec extends Codec { success = false; try { - FieldsConsumer ret = new PrefixCodedTermsWriter(indexWriter, state, postingsWriter, BytesRef.getUTF8SortedAsUnicodeComparator()); + FieldsConsumer ret = new BlockTermsWriter(indexWriter, state, postingsWriter, BytesRef.getUTF8SortedAsUnicodeComparator()); success = true; return ret; } finally { @@ -108,15 +108,15 @@ public class MockSepCodec extends Codec { success = false; try { - FieldsProducer ret = new PrefixCodedTermsReader(indexReader, - state.dir, - state.fieldInfos, - state.segmentInfo.name, - postingsReader, - state.readBufferSize, - BytesRef.getUTF8SortedAsUnicodeComparator(), - StandardCodec.TERMS_CACHE_SIZE, - state.codecId); + FieldsProducer ret = new BlockTermsReader(indexReader, + state.dir, + state.fieldInfos, + state.segmentInfo.name, + postingsReader, + state.readBufferSize, + BytesRef.getUTF8SortedAsUnicodeComparator(), + StandardCodec.TERMS_CACHE_SIZE, + state.codecId); success = true; return ret; } finally { @@ -133,7 +133,7 @@ public class MockSepCodec extends Codec { @Override public void files(Directory dir, SegmentInfo segmentInfo, String codecId, Set files) { SepPostingsReaderImpl.files(segmentInfo, codecId, files); - PrefixCodedTermsReader.files(dir, segmentInfo, codecId, files); + BlockTermsReader.files(dir, segmentInfo, codecId, files); FixedGapTermsIndexReader.files(dir, segmentInfo, codecId, files); } @@ -144,7 +144,7 @@ public class MockSepCodec extends Codec { public static void getSepExtensions(Set extensions) { SepPostingsWriterImpl.getExtensions(extensions); - PrefixCodedTermsReader.getExtensions(extensions); + BlockTermsReader.getExtensions(extensions); FixedGapTermsIndexReader.getIndexExtensions(extensions); } -} \ No newline at end of file +} diff --git a/lucene/src/test/org/apache/lucene/index/codecs/mocksep/MockSingleIntFactory.java b/lucene/src/test-framework/org/apache/lucene/index/codecs/mocksep/MockSingleIntFactory.java similarity index 100% rename from lucene/src/test/org/apache/lucene/index/codecs/mocksep/MockSingleIntFactory.java rename to lucene/src/test-framework/org/apache/lucene/index/codecs/mocksep/MockSingleIntFactory.java diff --git a/lucene/src/test/org/apache/lucene/index/codecs/mocksep/MockSingleIntIndexInput.java b/lucene/src/test-framework/org/apache/lucene/index/codecs/mocksep/MockSingleIntIndexInput.java similarity index 94% rename from lucene/src/test/org/apache/lucene/index/codecs/mocksep/MockSingleIntIndexInput.java rename to lucene/src/test-framework/org/apache/lucene/index/codecs/mocksep/MockSingleIntIndexInput.java index a476033a1ac..031794dd3ca 100644 --- a/lucene/src/test/org/apache/lucene/index/codecs/mocksep/MockSingleIntIndexInput.java +++ b/lucene/src/test-framework/org/apache/lucene/index/codecs/mocksep/MockSingleIntIndexInput.java @@ -19,10 +19,11 @@ package org.apache.lucene.index.codecs.mocksep; import java.io.IOException; +import org.apache.lucene.index.codecs.sep.IntIndexInput; +import org.apache.lucene.store.DataInput; import org.apache.lucene.store.Directory; import org.apache.lucene.store.IndexInput; import org.apache.lucene.util.CodecUtil; -import org.apache.lucene.index.codecs.sep.IntIndexInput; /** Reads IndexInputs written with {@link * SingleIntIndexOutput}. NOTE: this class is just for @@ -63,6 +64,7 @@ public class MockSingleIntIndexInput extends IntIndexInput { /** Reads next single int */ @Override public int next() throws IOException { + //System.out.println("msii.next() fp=" + in.getFilePointer() + " vs " + in.length()); return in.readVInt(); } } @@ -71,7 +73,7 @@ public class MockSingleIntIndexInput extends IntIndexInput { private long fp; @Override - public void read(IndexInput indexIn, boolean absolute) + public void read(DataInput indexIn, boolean absolute) throws IOException { if (absolute) { fp = indexIn.readVLong(); diff --git a/lucene/src/test/org/apache/lucene/index/codecs/mocksep/MockSingleIntIndexOutput.java b/lucene/src/test-framework/org/apache/lucene/index/codecs/mocksep/MockSingleIntIndexOutput.java similarity index 100% rename from lucene/src/test/org/apache/lucene/index/codecs/mocksep/MockSingleIntIndexOutput.java rename to lucene/src/test-framework/org/apache/lucene/index/codecs/mocksep/MockSingleIntIndexOutput.java diff --git a/lucene/src/test-framework/org/apache/lucene/index/codecs/preflexrw/PreFlexFieldsWriter.java b/lucene/src/test-framework/org/apache/lucene/index/codecs/preflexrw/PreFlexFieldsWriter.java new file mode 100644 index 00000000000..00b6e01cd62 --- /dev/null +++ b/lucene/src/test-framework/org/apache/lucene/index/codecs/preflexrw/PreFlexFieldsWriter.java @@ -0,0 +1,209 @@ +package org.apache.lucene.index.codecs.preflexrw; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.index.codecs.FieldsConsumer; +import org.apache.lucene.index.codecs.TermsConsumer; +import org.apache.lucene.index.codecs.PostingsConsumer; +import org.apache.lucene.index.codecs.TermStats; +import org.apache.lucene.index.codecs.standard.DefaultSkipListWriter; +import org.apache.lucene.index.codecs.preflex.PreFlexCodec; +import org.apache.lucene.index.CorruptIndexException; +import org.apache.lucene.index.IndexFileNames; +import org.apache.lucene.index.SegmentWriteState; +import org.apache.lucene.index.FieldInfo; +import org.apache.lucene.index.codecs.preflex.TermInfo; +import org.apache.lucene.store.IndexOutput; + +import java.io.IOException; +import java.util.Comparator; + +class PreFlexFieldsWriter extends FieldsConsumer { + + private final TermInfosWriter termsOut; + private final IndexOutput freqOut; + private final IndexOutput proxOut; + private final DefaultSkipListWriter skipListWriter; + private final int totalNumDocs; + + public PreFlexFieldsWriter(SegmentWriteState state) throws IOException { + termsOut = new TermInfosWriter(state.directory, + state.segmentName, + state.fieldInfos, + state.termIndexInterval); + + final String freqFile = IndexFileNames.segmentFileName(state.segmentName, "", PreFlexCodec.FREQ_EXTENSION); + freqOut = state.directory.createOutput(freqFile); + totalNumDocs = state.numDocs; + + if (state.fieldInfos.hasProx()) { + final String proxFile = IndexFileNames.segmentFileName(state.segmentName, "", PreFlexCodec.PROX_EXTENSION); + proxOut = state.directory.createOutput(proxFile); + } else { + proxOut = null; + } + + skipListWriter = new DefaultSkipListWriter(termsOut.skipInterval, + termsOut.maxSkipLevels, + totalNumDocs, + freqOut, + proxOut); + //System.out.println("\nw start seg=" + segment); + } + + @Override + public TermsConsumer addField(FieldInfo field) throws IOException { + assert field.number != -1; + //System.out.println("w field=" + field.name + " storePayload=" + field.storePayloads + " number=" + field.number); + return new PreFlexTermsWriter(field); + } + + @Override + public void close() throws IOException { + termsOut.close(); + freqOut.close(); + if (proxOut != null) { + proxOut.close(); + } + } + + private class PreFlexTermsWriter extends TermsConsumer { + private final FieldInfo fieldInfo; + private final boolean omitTF; + private final boolean storePayloads; + + private final TermInfo termInfo = new TermInfo(); + private final PostingsWriter postingsWriter = new PostingsWriter(); + + public PreFlexTermsWriter(FieldInfo fieldInfo) { + this.fieldInfo = fieldInfo; + omitTF = fieldInfo.omitTermFreqAndPositions; + storePayloads = fieldInfo.storePayloads; + } + + private class PostingsWriter extends PostingsConsumer { + private int lastDocID; + private int lastPayloadLength = -1; + private int lastPosition; + private int df; + + public PostingsWriter reset() { + df = 0; + lastDocID = 0; + lastPayloadLength = -1; + return this; + } + + @Override + public void startDoc(int docID, int termDocFreq) throws IOException { + //System.out.println(" w doc=" + docID); + + final int delta = docID - lastDocID; + if (docID < 0 || (df > 0 && delta <= 0)) { + throw new CorruptIndexException("docs out of order (" + docID + " <= " + lastDocID + " )"); + } + + if ((++df % termsOut.skipInterval) == 0) { + skipListWriter.setSkipData(lastDocID, storePayloads, lastPayloadLength); + skipListWriter.bufferSkip(df); + } + + lastDocID = docID; + + assert docID < totalNumDocs: "docID=" + docID + " totalNumDocs=" + totalNumDocs; + + if (omitTF) { + freqOut.writeVInt(delta); + } else { + final int code = delta << 1; + if (termDocFreq == 1) { + freqOut.writeVInt(code|1); + } else { + freqOut.writeVInt(code); + freqOut.writeVInt(termDocFreq); + } + } + lastPosition = 0; + } + + @Override + public void addPosition(int position, BytesRef payload) throws IOException { + assert proxOut != null; + + //System.out.println(" w pos=" + position + " payl=" + payload); + final int delta = position - lastPosition; + lastPosition = position; + + if (storePayloads) { + final int payloadLength = payload == null ? 0 : payload.length; + if (payloadLength != lastPayloadLength) { + //System.out.println(" write payload len=" + payloadLength); + lastPayloadLength = payloadLength; + proxOut.writeVInt((delta<<1)|1); + proxOut.writeVInt(payloadLength); + } else { + proxOut.writeVInt(delta << 1); + } + if (payloadLength > 0) { + proxOut.writeBytes(payload.bytes, payload.offset, payload.length); + } + } else { + proxOut.writeVInt(delta); + } + } + + @Override + public void finishDoc() throws IOException { + } + } + + @Override + public PostingsConsumer startTerm(BytesRef text) throws IOException { + //System.out.println(" w term=" + text.utf8ToString()); + skipListWriter.resetSkip(); + termInfo.freqPointer = freqOut.getFilePointer(); + if (proxOut != null) { + termInfo.proxPointer = proxOut.getFilePointer(); + } + return postingsWriter.reset(); + } + + @Override + public void finishTerm(BytesRef text, TermStats stats) throws IOException { + if (stats.docFreq > 0) { + long skipPointer = skipListWriter.writeSkip(freqOut); + termInfo.docFreq = stats.docFreq; + termInfo.skipOffset = (int) (skipPointer - termInfo.freqPointer); + //System.out.println(" w finish term=" + text.utf8ToString() + " fnum=" + fieldInfo.number); + termsOut.add(fieldInfo.number, + text, + termInfo); + } + } + + @Override + public void finish(long sumTotalTermCount) throws IOException { + } + + @Override + public Comparator getComparator() throws IOException { + return BytesRef.getUTF8SortedAsUTF16Comparator(); + } + } +} \ No newline at end of file diff --git a/lucene/src/test/org/apache/lucene/index/codecs/preflexrw/PreFlexRWCodec.java b/lucene/src/test-framework/org/apache/lucene/index/codecs/preflexrw/PreFlexRWCodec.java similarity index 100% rename from lucene/src/test/org/apache/lucene/index/codecs/preflexrw/PreFlexRWCodec.java rename to lucene/src/test-framework/org/apache/lucene/index/codecs/preflexrw/PreFlexRWCodec.java diff --git a/lucene/src/test-framework/org/apache/lucene/index/codecs/preflexrw/TermInfosWriter.java b/lucene/src/test-framework/org/apache/lucene/index/codecs/preflexrw/TermInfosWriter.java new file mode 100644 index 00000000000..782cd3a2a01 --- /dev/null +++ b/lucene/src/test-framework/org/apache/lucene/index/codecs/preflexrw/TermInfosWriter.java @@ -0,0 +1,227 @@ +package org.apache.lucene.index.codecs.preflexrw; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +import java.io.IOException; +import org.apache.lucene.store.IndexOutput; +import org.apache.lucene.store.Directory; +import org.apache.lucene.util.UnicodeUtil; +import org.apache.lucene.index.FieldInfos; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.index.codecs.preflex.TermInfo; + + +/** This stores a monotonically increasing set of pairs in a + Directory. A TermInfos can be written once, in order. */ + +final class TermInfosWriter { + /** The file format version, a negative number. */ + public static final int FORMAT = -3; + + // Changed strings to true utf8 with length-in-bytes not + // length-in-chars + public static final int FORMAT_VERSION_UTF8_LENGTH_IN_BYTES = -4; + + // NOTE: always change this if you switch to a new format! + public static final int FORMAT_CURRENT = FORMAT_VERSION_UTF8_LENGTH_IN_BYTES; + + private FieldInfos fieldInfos; + private IndexOutput output; + private TermInfo lastTi = new TermInfo(); + private long size; + + // TODO: the default values for these two parameters should be settable from + // IndexWriter. However, once that's done, folks will start setting them to + // ridiculous values and complaining that things don't work well, as with + // mergeFactor. So, let's wait until a number of folks find that alternate + // values work better. Note that both of these values are stored in the + // segment, so that it's safe to change these w/o rebuilding all indexes. + + /** Expert: The fraction of terms in the "dictionary" which should be stored + * in RAM. Smaller values use more memory, but make searching slightly + * faster, while larger values use less memory and make searching slightly + * slower. Searching is typically not dominated by dictionary lookup, so + * tweaking this is rarely useful.*/ + int indexInterval = 128; + + /** Expert: The fraction of {@link TermDocs} entries stored in skip tables, + * used to accelerate {@link TermDocs#skipTo(int)}. Larger values result in + * smaller indexes, greater acceleration, but fewer accelerable cases, while + * smaller values result in bigger indexes, less acceleration and more + * accelerable cases. More detailed experiments would be useful here. */ + int skipInterval = 16; + + /** Expert: The maximum number of skip levels. Smaller values result in + * slightly smaller indexes, but slower skipping in big posting lists. + */ + int maxSkipLevels = 10; + + private long lastIndexPointer; + private boolean isIndex; + private final BytesRef lastTerm = new BytesRef(); + private int lastFieldNumber = -1; + + private TermInfosWriter other; + + TermInfosWriter(Directory directory, String segment, FieldInfos fis, + int interval) + throws IOException { + initialize(directory, segment, fis, interval, false); + other = new TermInfosWriter(directory, segment, fis, interval, true); + other.other = this; + } + + private TermInfosWriter(Directory directory, String segment, FieldInfos fis, + int interval, boolean isIndex) throws IOException { + initialize(directory, segment, fis, interval, isIndex); + } + + private void initialize(Directory directory, String segment, FieldInfos fis, + int interval, boolean isi) throws IOException { + indexInterval = interval; + fieldInfos = fis; + isIndex = isi; + output = directory.createOutput(segment + (isIndex ? ".tii" : ".tis")); + output.writeInt(FORMAT_CURRENT); // write format + output.writeLong(0); // leave space for size + output.writeInt(indexInterval); // write indexInterval + output.writeInt(skipInterval); // write skipInterval + output.writeInt(maxSkipLevels); // write maxSkipLevels + assert initUTF16Results(); + } + + // Currently used only by assert statements + UnicodeUtil.UTF16Result utf16Result1; + UnicodeUtil.UTF16Result utf16Result2; + private final BytesRef scratchBytes = new BytesRef(); + + // Currently used only by assert statements + private boolean initUTF16Results() { + utf16Result1 = new UnicodeUtil.UTF16Result(); + utf16Result2 = new UnicodeUtil.UTF16Result(); + return true; + } + + // Currently used only by assert statement + private int compareToLastTerm(int fieldNumber, BytesRef term) { + + if (lastFieldNumber != fieldNumber) { + final int cmp = fieldInfos.fieldName(lastFieldNumber).compareTo(fieldInfos.fieldName(fieldNumber)); + // If there is a field named "" (empty string) then we + // will get 0 on this comparison, yet, it's "OK". But + // it's not OK if two different field numbers map to + // the same name. + if (cmp != 0 || lastFieldNumber != -1) + return cmp; + } + + scratchBytes.copy(term); + assert lastTerm.offset == 0; + UnicodeUtil.UTF8toUTF16(lastTerm.bytes, 0, lastTerm.length, utf16Result1); + + assert scratchBytes.offset == 0; + UnicodeUtil.UTF8toUTF16(scratchBytes.bytes, 0, scratchBytes.length, utf16Result2); + + final int len; + if (utf16Result1.length < utf16Result2.length) + len = utf16Result1.length; + else + len = utf16Result2.length; + + for(int i=0;i, TermInfo> pair to the set. + Term must be lexicographically greater than all previous Terms added. + TermInfo pointers must be positive and greater than all previous.*/ + public void add(int fieldNumber, BytesRef term, TermInfo ti) + throws IOException { + + assert compareToLastTerm(fieldNumber, term) < 0 || + (isIndex && term.length == 0 && lastTerm.length == 0) : + "Terms are out of order: field=" + fieldInfos.fieldName(fieldNumber) + " (number " + fieldNumber + ")" + + " lastField=" + fieldInfos.fieldName(lastFieldNumber) + " (number " + lastFieldNumber + ")" + + " text=" + term.utf8ToString() + " lastText=" + lastTerm.utf8ToString(); + + assert ti.freqPointer >= lastTi.freqPointer: "freqPointer out of order (" + ti.freqPointer + " < " + lastTi.freqPointer + ")"; + assert ti.proxPointer >= lastTi.proxPointer: "proxPointer out of order (" + ti.proxPointer + " < " + lastTi.proxPointer + ")"; + + if (!isIndex && size % indexInterval == 0) + other.add(lastFieldNumber, lastTerm, lastTi); // add an index term + + writeTerm(fieldNumber, term); // write term + + output.writeVInt(ti.docFreq); // write doc freq + output.writeVLong(ti.freqPointer - lastTi.freqPointer); // write pointers + output.writeVLong(ti.proxPointer - lastTi.proxPointer); + + if (ti.docFreq >= skipInterval) { + output.writeVInt(ti.skipOffset); + } + + if (isIndex) { + output.writeVLong(other.output.getFilePointer() - lastIndexPointer); + lastIndexPointer = other.output.getFilePointer(); // write pointer + } + + lastFieldNumber = fieldNumber; + lastTi.set(ti); + size++; + } + + private void writeTerm(int fieldNumber, BytesRef term) + throws IOException { + + //System.out.println(" tiw.write field=" + fieldNumber + " term=" + term.utf8ToString()); + + // TODO: UTF16toUTF8 could tell us this prefix + // Compute prefix in common with last term: + int start = 0; + final int limit = term.length < lastTerm.length ? term.length : lastTerm.length; + while(start < limit) { + if (term.bytes[start+term.offset] != lastTerm.bytes[start+lastTerm.offset]) + break; + start++; + } + + final int length = term.length - start; + output.writeVInt(start); // write shared prefix length + output.writeVInt(length); // write delta length + output.writeBytes(term.bytes, start+term.offset, length); // write delta bytes + output.writeVInt(fieldNumber); // write field num + lastTerm.copy(term); + } + + /** Called to complete TermInfos creation. */ + void close() throws IOException { + output.seek(4); // write size after format + output.writeLong(size); + output.close(); + + if (!isIndex) + other.close(); + } + +} diff --git a/lucene/src/test/org/apache/lucene/search/CheckHits.java b/lucene/src/test-framework/org/apache/lucene/search/CheckHits.java similarity index 97% rename from lucene/src/test/org/apache/lucene/search/CheckHits.java rename to lucene/src/test-framework/org/apache/lucene/search/CheckHits.java index dedd91949b6..6846e59deca 100644 --- a/lucene/src/test/org/apache/lucene/search/CheckHits.java +++ b/lucene/src/test-framework/org/apache/lucene/search/CheckHits.java @@ -25,6 +25,7 @@ import java.util.Random; import junit.framework.Assert; import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexReader.AtomicReaderContext; import org.apache.lucene.store.Directory; public class CheckHits { @@ -99,11 +100,13 @@ public class CheckHits { for (int i = -1; i < 2; i++) { actual.clear(); - QueryUtils.wrapUnderlyingReader - (random, searcher, i).search(query, c); + IndexSearcher s = QueryUtils.wrapUnderlyingReader + (random, searcher, i); + s.search(query, c); Assert.assertEquals("Wrap Reader " + i + ": " + query.toString(defaultFieldName), correct, actual); + s.close(); } } @@ -120,8 +123,8 @@ public class CheckHits { bag.add(Integer.valueOf(doc + base)); } @Override - public void setNextReader(IndexReader reader, int docBase) { - base = docBase; + public void setNextReader(AtomicReaderContext context) { + base = context.docBase; } @Override public boolean acceptsDocsOutOfOrder() { @@ -483,8 +486,8 @@ public class CheckHits { verifyExplanation(d,doc,scorer.score(),deep,exp); } @Override - public void setNextReader(IndexReader reader, int docBase) { - base = docBase; + public void setNextReader(AtomicReaderContext context) { + base = context.docBase; } @Override public boolean acceptsDocsOutOfOrder() { diff --git a/lucene/src/test/org/apache/lucene/search/QueryUtils.java b/lucene/src/test-framework/org/apache/lucene/search/QueryUtils.java similarity index 83% rename from lucene/src/test/org/apache/lucene/search/QueryUtils.java rename to lucene/src/test-framework/org/apache/lucene/search/QueryUtils.java index c2c8b17fc52..e84b2f9a8b9 100644 --- a/lucene/src/test/org/apache/lucene/search/QueryUtils.java +++ b/lucene/src/test-framework/org/apache/lucene/search/QueryUtils.java @@ -6,18 +6,23 @@ import java.io.IOException; import java.io.ObjectInputStream; import java.io.ObjectOutputStream; import java.util.Random; +import java.lang.reflect.Method; import junit.framework.Assert; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexReader.AtomicReaderContext; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.MultiReader; +import org.apache.lucene.search.Weight.ScorerContext; import org.apache.lucene.store.Directory; import org.apache.lucene.store.MockDirectoryWrapper; import org.apache.lucene.store.RAMDirectory; +import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.util.ReaderUtil; import static org.apache.lucene.util.LuceneTestCase.TEST_VERSION_CURRENT; @@ -110,9 +115,13 @@ public class QueryUtils { checkFirstSkipTo(q1,s); checkSkipTo(q1,s); if (wrap) { - check(random, q1, wrapUnderlyingReader(random, s, -1), false); - check(random, q1, wrapUnderlyingReader(random, s, 0), false); - check(random, q1, wrapUnderlyingReader(random, s, +1), false); + IndexSearcher wrapped; + check(random, q1, wrapped = wrapUnderlyingReader(random, s, -1), false); + wrapped.close(); + check(random, q1, wrapped = wrapUnderlyingReader(random, s, 0), false); + wrapped.close(); + check(random, q1, wrapped = wrapUnderlyingReader(random, s, +1), false); + wrapped.close(); } checkExplanations(q1,s); checkSerialization(q1,s); @@ -154,8 +163,8 @@ public class QueryUtils { IndexReader.open(makeEmptyIndex(random, 0), true), 0 < edge ? r : IndexReader.open(makeEmptyIndex(random, 0), true)) }; - IndexSearcher out = new IndexSearcher(new MultiReader(readers)); - out.setSimilarity(s.getSimilarity()); + IndexSearcher out = LuceneTestCase.newSearcher(new MultiReader(readers)); + out.setSimilarityProvider(s.getSimilarityProvider()); return out; } @@ -169,6 +178,16 @@ public class QueryUtils { } w.commit(); w.deleteDocuments( new MatchAllDocsQuery() ); + try { + // Carefully invoke what is a package-private (test + // only, internal) method on IndexWriter: + Method m = IndexWriter.class.getDeclaredMethod("keepFullyDeletedSegments"); + m.setAccessible(true); + m.invoke(w); + } catch (Exception e) { + // Should not happen? + throw new RuntimeException(e); + } w.commit(); if (0 < numDeletedDocs) @@ -211,13 +230,12 @@ public class QueryUtils { } } - /** alternate scorer skipTo(),skipTo(),next(),next(),skipTo(),skipTo(), etc * and ensure a hitcollector receives same docs and scores */ public static void checkSkipTo(final Query q, final IndexSearcher s) throws IOException { //System.out.println("Checking "+q); - + final AtomicReaderContext[] readerContextArray = ReaderUtil.leaves(s.getTopReaderContext()); if (q.weight(s).scoresDocsOutOfOrder()) return; // in this case order of skipTo() might differ from that of next(). final int skip_op = 0; @@ -247,8 +265,8 @@ public class QueryUtils { s.search(q, new Collector() { private Scorer sc; - private IndexReader reader; private Scorer scorer; + private int leafPtr; @Override public void setScorer(Scorer scorer) throws IOException { @@ -262,7 +280,7 @@ public class QueryUtils { try { if (scorer == null) { Weight w = q.weight(s); - scorer = w.scorer(reader, true, false); + scorer = w.scorer(readerContextArray[leafPtr], ScorerContext.def()); } int op = order[(opidx[0]++) % order.length]; @@ -300,19 +318,23 @@ public class QueryUtils { } @Override - public void setNextReader(IndexReader reader, int docBase) throws IOException { + public void setNextReader(AtomicReaderContext context) throws IOException { // confirm that skipping beyond the last doc, on the // previous reader, hits NO_MORE_DOCS if (lastReader[0] != null) { final IndexReader previousReader = lastReader[0]; - Weight w = q.weight(new IndexSearcher(previousReader)); - Scorer scorer = w.scorer(previousReader, true, false); + IndexSearcher indexSearcher = LuceneTestCase.newSearcher(previousReader); + Weight w = q.weight(indexSearcher); + Scorer scorer = w.scorer((AtomicReaderContext)indexSearcher.getTopReaderContext(), ScorerContext.def()); if (scorer != null) { boolean more = scorer.advance(lastDoc[0] + 1) != DocIdSetIterator.NO_MORE_DOCS; Assert.assertFalse("query's last doc was "+ lastDoc[0] +" but skipTo("+(lastDoc[0]+1)+") got to "+scorer.docID(),more); } + leafPtr++; + indexSearcher.close(); } - this.reader = lastReader[0] = reader; + lastReader[0] = context.reader; + assert readerContextArray[leafPtr].reader == context.reader; this.scorer = null; lastDoc[0] = -1; } @@ -327,12 +349,14 @@ public class QueryUtils { // confirm that skipping beyond the last doc, on the // previous reader, hits NO_MORE_DOCS final IndexReader previousReader = lastReader[0]; - Weight w = q.weight(new IndexSearcher(previousReader)); - Scorer scorer = w.scorer(previousReader, true, false); + IndexSearcher indexSearcher = LuceneTestCase.newSearcher(previousReader); + Weight w = q.weight(indexSearcher); + Scorer scorer = w.scorer((AtomicReaderContext)previousReader.getTopReaderContext(), ScorerContext.def()); if (scorer != null) { boolean more = scorer.advance(lastDoc[0] + 1) != DocIdSetIterator.NO_MORE_DOCS; Assert.assertFalse("query's last doc was "+ lastDoc[0] +" but skipTo("+(lastDoc[0]+1)+") got to "+scorer.docID(),more); } + indexSearcher.close(); } } } @@ -343,10 +367,10 @@ public class QueryUtils { final float maxDiff = 1e-3f; final int lastDoc[] = {-1}; final IndexReader lastReader[] = {null}; - + final AtomicReaderContext[] context = ReaderUtil.leaves(s.getTopReaderContext()); s.search(q,new Collector() { private Scorer scorer; - private IndexReader reader; + private int leafPtr; @Override public void setScorer(Scorer scorer) throws IOException { this.scorer = scorer; @@ -358,7 +382,7 @@ public class QueryUtils { long startMS = System.currentTimeMillis(); for (int i=lastDoc[0]+1; i<=doc; i++) { Weight w = q.weight(s); - Scorer scorer = w.scorer(reader, true, false); + Scorer scorer = w.scorer(context[leafPtr], ScorerContext.def()); Assert.assertTrue("query collected "+doc+" but skipTo("+i+") says no more docs!",scorer.advance(i) != DocIdSetIterator.NO_MORE_DOCS); Assert.assertEquals("query collected "+doc+" but skipTo("+i+") got to "+scorer.docID(),doc,scorer.docID()); float skipToScore = scorer.score(); @@ -378,20 +402,23 @@ public class QueryUtils { } @Override - public void setNextReader(IndexReader reader, int docBase) throws IOException { + public void setNextReader(AtomicReaderContext context) throws IOException { // confirm that skipping beyond the last doc, on the // previous reader, hits NO_MORE_DOCS if (lastReader[0] != null) { final IndexReader previousReader = lastReader[0]; - Weight w = q.weight(new IndexSearcher(previousReader)); - Scorer scorer = w.scorer(previousReader, true, false); + IndexSearcher indexSearcher = LuceneTestCase.newSearcher(previousReader); + Weight w = q.weight(indexSearcher); + Scorer scorer = w.scorer((AtomicReaderContext)indexSearcher.getTopReaderContext(), ScorerContext.def()); if (scorer != null) { boolean more = scorer.advance(lastDoc[0] + 1) != DocIdSetIterator.NO_MORE_DOCS; Assert.assertFalse("query's last doc was "+ lastDoc[0] +" but skipTo("+(lastDoc[0]+1)+") got to "+scorer.docID(),more); } + indexSearcher.close(); + leafPtr++; } - this.reader = lastReader[0] = reader; + lastReader[0] = context.reader; lastDoc[0] = -1; } @Override @@ -404,12 +431,14 @@ public class QueryUtils { // confirm that skipping beyond the last doc, on the // previous reader, hits NO_MORE_DOCS final IndexReader previousReader = lastReader[0]; - Weight w = q.weight(new IndexSearcher(previousReader)); - Scorer scorer = w.scorer(previousReader, true, false); + IndexSearcher indexSearcher = LuceneTestCase.newSearcher(previousReader); + Weight w = q.weight(indexSearcher); + Scorer scorer = w.scorer((AtomicReaderContext)indexSearcher.getTopReaderContext(), ScorerContext.def()); if (scorer != null) { boolean more = scorer.advance(lastDoc[0] + 1) != DocIdSetIterator.NO_MORE_DOCS; Assert.assertFalse("query's last doc was "+ lastDoc[0] +" but skipTo("+(lastDoc[0]+1)+") got to "+scorer.docID(),more); } + indexSearcher.close(); } } } diff --git a/lucene/src/test/org/apache/lucene/store/MockDirectoryWrapper.java b/lucene/src/test-framework/org/apache/lucene/store/MockDirectoryWrapper.java similarity index 97% rename from lucene/src/test/org/apache/lucene/store/MockDirectoryWrapper.java rename to lucene/src/test-framework/org/apache/lucene/store/MockDirectoryWrapper.java index 13587132b9d..bb9552bf7d9 100644 --- a/lucene/src/test/org/apache/lucene/store/MockDirectoryWrapper.java +++ b/lucene/src/test-framework/org/apache/lucene/store/MockDirectoryWrapper.java @@ -31,7 +31,9 @@ import java.util.Map; import java.util.Random; import java.util.Set; +import org.apache.lucene.index.IndexReader; import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.util._TestUtil; /** * This is a Directory Wrapper that adds methods @@ -48,6 +50,7 @@ public class MockDirectoryWrapper extends Directory { Random randomState; boolean noDeleteOpenFile = true; boolean preventDoubleWrite = true; + boolean checkIndexOnClose = true; boolean trackDiskUsage = false; private Set unSyncedFiles; private Set createdFiles; @@ -205,6 +208,17 @@ public class MockDirectoryWrapper extends Directory { return noDeleteOpenFile; } + /** + * Set whether or not checkindex should be run + * on close + */ + public void setCheckIndexOnClose(boolean value) { + this.checkIndexOnClose = value; + } + + public boolean getCheckIndexOnClose() { + return checkIndexOnClose; + } /** * If 0.0, no exceptions will be thrown. Else this should * be a double 0.0 - 1.0. We will randomly throw an @@ -393,6 +407,9 @@ public class MockDirectoryWrapper extends Directory { throw new RuntimeException("MockDirectoryWrapper: cannot close: there are still open files: " + openFiles, cause); } open = false; + if (checkIndexOnClose && IndexReader.indexExists(this)) { + _TestUtil.checkIndex(this); + } delegate.close(); } diff --git a/lucene/src/test/org/apache/lucene/store/MockIndexInputWrapper.java b/lucene/src/test-framework/org/apache/lucene/store/MockIndexInputWrapper.java similarity index 100% rename from lucene/src/test/org/apache/lucene/store/MockIndexInputWrapper.java rename to lucene/src/test-framework/org/apache/lucene/store/MockIndexInputWrapper.java diff --git a/lucene/src/test/org/apache/lucene/store/MockIndexOutputWrapper.java b/lucene/src/test-framework/org/apache/lucene/store/MockIndexOutputWrapper.java similarity index 100% rename from lucene/src/test/org/apache/lucene/store/MockIndexOutputWrapper.java rename to lucene/src/test-framework/org/apache/lucene/store/MockIndexOutputWrapper.java diff --git a/lucene/src/test/org/apache/lucene/store/_TestHelper.java b/lucene/src/test-framework/org/apache/lucene/store/_TestHelper.java similarity index 100% rename from lucene/src/test/org/apache/lucene/store/_TestHelper.java rename to lucene/src/test-framework/org/apache/lucene/store/_TestHelper.java diff --git a/lucene/src/test/org/apache/lucene/util/LineFileDocs.java b/lucene/src/test-framework/org/apache/lucene/util/LineFileDocs.java similarity index 75% rename from lucene/src/test/org/apache/lucene/util/LineFileDocs.java rename to lucene/src/test-framework/org/apache/lucene/util/LineFileDocs.java index fe3c0eb355a..56cb3e089d3 100644 --- a/lucene/src/test/org/apache/lucene/util/LineFileDocs.java +++ b/lucene/src/test-framework/org/apache/lucene/util/LineFileDocs.java @@ -18,6 +18,7 @@ package org.apache.lucene.util; */ import java.io.Closeable; +import java.io.File; import java.io.FileInputStream; import java.io.IOException; import java.io.BufferedReader; @@ -26,6 +27,7 @@ import java.io.InputStream; import java.io.BufferedInputStream; import java.util.concurrent.atomic.AtomicInteger; import java.util.zip.GZIPInputStream; +import java.util.Random; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; @@ -36,21 +38,19 @@ import org.apache.lucene.document.Field; public class LineFileDocs implements Closeable { private BufferedReader reader; - private final boolean forever; private final static int BUFFER_SIZE = 1 << 16; // 64K private final AtomicInteger id = new AtomicInteger(); private final String path; // If forever is true, we rewind the file at EOF (repeat // the docs over and over) - public LineFileDocs(String path, boolean forever) throws IOException { + public LineFileDocs(Random random, String path) throws IOException { this.path = path; - this.forever = forever; - open(); + open(random); } - public LineFileDocs(boolean forever) throws IOException { - this(LuceneTestCase.TEST_LINE_DOCS_FILE, forever); + public LineFileDocs(Random random) throws IOException { + this(random, LuceneTestCase.TEST_LINE_DOCS_FILE); } public synchronized void close() throws IOException { @@ -60,22 +60,49 @@ public class LineFileDocs implements Closeable { } } - private synchronized void open() throws IOException { + private synchronized void open(Random random) throws IOException { InputStream is = getClass().getResourceAsStream(path); if (is == null) { // if its not in classpath, we load it as absolute filesystem path (e.g. Hudson's home dir) is = new FileInputStream(path); } + File file = new File(path); + long size; + if (file.exists()) { + size = file.length(); + } else { + size = is.available(); + } if (path.endsWith(".gz")) { is = new GZIPInputStream(is); + // guestimate: + size *= 2.8; } + final InputStream in = new BufferedInputStream(is, BUFFER_SIZE); reader = new BufferedReader(new InputStreamReader(in, "UTF-8"), BUFFER_SIZE); + + // Override sizes for currently "known" line files: + if (path.equals("europarl.lines.txt.gz")) { + size = 15129506L; + } else if (path.equals("/home/hudson/lucene-data/enwiki.random.lines.txt.gz")) { + size = 3038178822L; + } + + // Randomly seek to starting point: + if (random != null && size > 3) { + final long seekTo = (random.nextLong()&Long.MAX_VALUE) % (size/3); + if (LuceneTestCase.VERBOSE) { + System.out.println("TEST: LineFileDocs: seek to fp=" + seekTo + " on open"); + } + reader.skip(seekTo); + reader.readLine(); + } } - public synchronized void reset() throws IOException { + public synchronized void reset(Random random) throws IOException { close(); - open(); + open(random); id.set(0); } @@ -117,15 +144,13 @@ public class LineFileDocs implements Closeable { synchronized(this) { line = reader.readLine(); if (line == null) { - if (forever) { - if (LuceneTestCase.VERBOSE) { - System.out.println("TEST: LineFileDocs: now rewind file..."); - } - close(); - open(); - line = reader.readLine(); + // Always rewind at end: + if (LuceneTestCase.VERBOSE) { + System.out.println("TEST: LineFileDocs: now rewind file..."); } - return null; + close(); + open(null); + line = reader.readLine(); } } diff --git a/lucene/src/test/org/apache/lucene/util/LuceneJUnitDividingSelector.java b/lucene/src/test-framework/org/apache/lucene/util/LuceneJUnitDividingSelector.java similarity index 98% rename from lucene/src/test/org/apache/lucene/util/LuceneJUnitDividingSelector.java rename to lucene/src/test-framework/org/apache/lucene/util/LuceneJUnitDividingSelector.java index cf27a7267fc..5a9509c5a82 100644 --- a/lucene/src/test/org/apache/lucene/util/LuceneJUnitDividingSelector.java +++ b/lucene/src/test-framework/org/apache/lucene/util/LuceneJUnitDividingSelector.java @@ -30,6 +30,7 @@ public class LuceneJUnitDividingSelector extends BaseExtendSelector { /** Current part to accept. */ private int part; + @Override public void setParameters(Parameter[] pParameters) { super.setParameters(pParameters); for (int j = 0; j < pParameters.length; j++) { @@ -46,6 +47,7 @@ public class LuceneJUnitDividingSelector extends BaseExtendSelector { } } + @Override public void verifySettings() { super.verifySettings(); if (divisor <= 0 || part <= 0) { @@ -56,6 +58,7 @@ public class LuceneJUnitDividingSelector extends BaseExtendSelector { } } + @Override public boolean isSelected(File dir, String name, File path) { counter = counter % divisor + 1; return counter == part; diff --git a/lucene/src/test/org/apache/lucene/util/LuceneJUnitResultFormatter.java b/lucene/src/test-framework/org/apache/lucene/util/LuceneJUnitResultFormatter.java similarity index 100% rename from lucene/src/test/org/apache/lucene/util/LuceneJUnitResultFormatter.java rename to lucene/src/test-framework/org/apache/lucene/util/LuceneJUnitResultFormatter.java diff --git a/lucene/src/test/org/apache/lucene/util/LuceneTestCase.java b/lucene/src/test-framework/org/apache/lucene/util/LuceneTestCase.java similarity index 93% rename from lucene/src/test/org/apache/lucene/util/LuceneTestCase.java rename to lucene/src/test-framework/org/apache/lucene/util/LuceneTestCase.java index f133f6ad0c4..a10689c98be 100644 --- a/lucene/src/test/org/apache/lucene/util/LuceneTestCase.java +++ b/lucene/src/test-framework/org/apache/lucene/util/LuceneTestCase.java @@ -28,6 +28,9 @@ import java.lang.reflect.Constructor; import java.lang.reflect.Method; import java.lang.reflect.Modifier; import java.util.*; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.TimeUnit; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -51,6 +54,7 @@ import org.apache.lucene.index.codecs.standard.StandardCodec; import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.FieldCache; import org.apache.lucene.search.FieldCache.CacheEntry; +import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.store.LockFactory; @@ -390,6 +394,16 @@ public abstract class LuceneTestCase extends Assert { if (testsFailed) { System.err.println("NOTE: all tests run in this JVM:"); System.err.println(Arrays.toString(testClassesRun.toArray())); + System.err.println("NOTE: " + System.getProperty("os.name") + " " + + System.getProperty("os.version") + " " + + System.getProperty("os.arch") + "/" + + System.getProperty("java.vendor") + " " + + System.getProperty("java.version") + " " + + (Constants.JRE_IS_64BIT ? "(64-bit)" : "(32-bit)") + "/" + + "cpus=" + Runtime.getRuntime().availableProcessors() + "," + + "threads=" + Thread.activeCount() + "," + + "free=" + Runtime.getRuntime().freeMemory() + "," + + "total=" + Runtime.getRuntime().totalMemory()); } } @@ -554,14 +568,21 @@ public abstract class LuceneTestCase extends Assert { if (t.isAlive() && !rogueThreads.containsKey(t) && - t != Thread.currentThread()) { + t != Thread.currentThread() && + /* its ok to keep your searcher across test cases */ + (t.getName().startsWith("LuceneTestCase") && context.startsWith("test method")) == false) { System.err.println("WARNING: " + context + " left thread running: " + t); rogueThreads.put(t, true); rogueCount++; - // wait on the thread to die of natural causes - try { - t.join(THREAD_STOP_GRACE_MSEC); - } catch (InterruptedException e) { e.printStackTrace(); } + if (t.getName().startsWith("LuceneTestCase")) { + System.err.println("PLEASE CLOSE YOUR INDEXSEARCHERS IN YOUR TEST!!!!"); + continue; + } else { + // wait on the thread to die of natural causes + try { + t.join(THREAD_STOP_GRACE_MSEC); + } catch (InterruptedException e) { e.printStackTrace(); } + } // try to stop the thread: t.setUncaughtExceptionHandler(null); Thread.setDefaultUncaughtExceptionHandler(null); @@ -741,7 +762,11 @@ public abstract class LuceneTestCase extends Assert { c.setMaxThreadStates(_TestUtil.nextInt(r, 1, 20)); } - c.setMergePolicy(newLogMergePolicy(r)); + if (r.nextBoolean()) { + c.setMergePolicy(new MockRandomMergePolicy(r)); + } else { + c.setMergePolicy(newLogMergePolicy()); + } c.setReaderPooling(r.nextBoolean()); c.setReaderTermsIndexDivisor(_TestUtil.nextInt(r, 1, 4)); @@ -764,6 +789,19 @@ public abstract class LuceneTestCase extends Assert { return logmp; } + public static LogMergePolicy newInOrderLogMergePolicy() { + LogMergePolicy logmp = newLogMergePolicy(); + logmp.setRequireContiguousMerge(true); + return logmp; + } + + public static LogMergePolicy newInOrderLogMergePolicy(int mergeFactor) { + LogMergePolicy logmp = newLogMergePolicy(); + logmp.setMergeFactor(mergeFactor); + logmp.setRequireContiguousMerge(true); + return logmp; + } + public static LogMergePolicy newLogMergePolicy(boolean useCFS) { LogMergePolicy logmp = newLogMergePolicy(); logmp.setUseCompoundFile(useCFS); @@ -991,6 +1029,34 @@ public abstract class LuceneTestCase extends Assert { } } + /** create a new searcher over the reader */ + public static IndexSearcher newSearcher(IndexReader r) throws IOException { + if (random.nextBoolean()) { + return new IndexSearcher(r); + } else { + int threads = 0; + final ExecutorService ex = (random.nextBoolean()) ? null + : Executors.newFixedThreadPool(threads = _TestUtil.nextInt(random, 1, 8), + new NamedThreadFactory("LuceneTestCase")); + if (ex != null && VERBOSE) { + System.out.println("NOTE: newSearcher using ExecutorService with " + threads + " threads"); + } + return new IndexSearcher(r.getTopReaderContext(), ex) { + @Override + public void close() throws IOException { + super.close(); + if (ex != null) { + ex.shutdown(); + try { + ex.awaitTermination(1000, TimeUnit.MILLISECONDS); + } catch (InterruptedException e) { + e.printStackTrace(); + } + } + } + }; + } + } public String getName() { return this.name; @@ -1107,8 +1173,15 @@ public abstract class LuceneTestCase extends Assert { @Override protected void runChild(FrameworkMethod arg0, RunNotifier arg1) { - for (int i = 0; i < TEST_ITER; i++) + if (VERBOSE) { + System.out.println("\nNOTE: running test " + arg0.getName()); + } + for (int i = 0; i < TEST_ITER; i++) { + if (VERBOSE && TEST_ITER > 1) { + System.out.println("\nNOTE: running iter=" + (1+i) + " of " + TEST_ITER); + } super.runChild(arg0, arg1); + } } public LuceneTestCaseRunner(Class clazz) throws InitializationError { diff --git a/lucene/src/test/org/apache/lucene/util/_TestUtil.java b/lucene/src/test-framework/org/apache/lucene/util/_TestUtil.java similarity index 83% rename from lucene/src/test/org/apache/lucene/util/_TestUtil.java rename to lucene/src/test-framework/org/apache/lucene/util/_TestUtil.java index 64ed7e64bba..ad34c887147 100644 --- a/lucene/src/test/org/apache/lucene/util/_TestUtil.java +++ b/lucene/src/test-framework/org/apache/lucene/util/_TestUtil.java @@ -17,11 +17,22 @@ package org.apache.lucene.util; * limitations under the License. */ +import java.io.BufferedOutputStream; import java.io.ByteArrayOutputStream; import java.io.File; +import java.io.FileOutputStream; import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; import java.io.PrintStream; +import java.util.Enumeration; import java.util.Random; +import java.util.Map; +import java.util.HashMap; +import java.util.zip.ZipEntry; +import java.util.zip.ZipFile; + +import org.junit.Assert; import org.apache.lucene.index.CheckIndex; import org.apache.lucene.index.ConcurrentMergeScheduler; @@ -57,6 +68,49 @@ public class _TestUtil { } } + /** + * Convenience method: Unzip zipName + ".zip" under destDir, removing destDir first + */ + public static void unzip(File zipName, File destDir) throws IOException { + + ZipFile zipFile = new ZipFile(zipName); + + Enumeration entries = zipFile.entries(); + + rmDir(destDir); + + destDir.mkdir(); + + while (entries.hasMoreElements()) { + ZipEntry entry = entries.nextElement(); + + InputStream in = zipFile.getInputStream(entry); + File targetFile = new File(destDir, entry.getName()); + if (entry.isDirectory()) { + // allow unzipping with directory structure + targetFile.mkdirs(); + } else { + if (targetFile.getParentFile()!=null) { + // be on the safe side: do not rely on that directories are always extracted + // before their children (although this makes sense, but is it guaranteed?) + targetFile.getParentFile().mkdirs(); + } + OutputStream out = new BufferedOutputStream(new FileOutputStream(targetFile)); + + byte[] buffer = new byte[8192]; + int len; + while((len = in.read(buffer)) >= 0) { + out.write(buffer, 0, len); + } + + in.close(); + out.close(); + } + } + + zipFile.close(); + } + public static void syncConcurrentMerges(IndexWriter writer) { syncConcurrentMerges(writer.getConfig().getMergeScheduler()); } @@ -124,37 +178,6 @@ public class _TestUtil { } return new String(buffer, 0, end); } - - public static String randomUnicodeString(Random r, int minLength, int maxLength) { - if(minLength > maxLength) - throw new IllegalArgumentException("minLength must be >= maxLength"); - final boolean lenEqual = minLength==maxLength; - final int end = lenEqual?minLength:minLength + r.nextInt(maxLength-minLength+1); - if (end == 0) { - // allow 0 length - return ""; - } - - // TODO(simonw): check this - final int fixedPlane = 5;//minLength % 5; - final char[] buffer = new char[end]; - for (int i = 0; i < end; i++) { - int t = lenEqual? fixedPlane: r.nextInt(5); - //buffer[i] = (char) (97 + r.nextInt(26)); - if (0 == t && i < end - 1 && !lenEqual) { - // Make a surrogate pair - // High surrogate - buffer[i++] = (char) nextInt(r, 0xd800, 0xdbff); - // Low surrogate - buffer[i] = (char) nextInt(r, 0xdc00, 0xdfff); - } - else if (t <= 1) buffer[i] = (char) r.nextInt(0x80); - else if (2 == t) buffer[i] = (char) nextInt(r, 0x80, 0x800); - else if (3 == t) buffer[i] = (char) nextInt(r, 0x800, 0xd7ff); - else if (4 == t) buffer[i] = (char) nextInt(r, 0xe000, 0xffff); - } - return new String(buffer, 0, end); - } private static final int[] blockStarts = { 0x0000, 0x0080, 0x0100, 0x0180, 0x0250, 0x02B0, 0x0300, 0x0370, 0x0400, @@ -222,6 +245,37 @@ public class _TestUtil { sb.appendCodePoint(nextInt(r, blockStarts[block], blockEnds[block])); return sb.toString(); } + + public static String randomUnicodeString(Random r, int minLength, int maxLength) { + if(minLength > maxLength) + throw new IllegalArgumentException("minLength must be >= maxLength"); + final boolean lenEqual = minLength==maxLength; + final int end = lenEqual?minLength:minLength + r.nextInt(maxLength-minLength+1); + if (end == 0) { + // allow 0 length + return ""; + } + + // TODO(simonw): check this + final int fixedPlane = 5;//minLength % 5; + final char[] buffer = new char[end]; + for (int i = 0; i < end; i++) { + int t = lenEqual? fixedPlane: r.nextInt(5); + //buffer[i] = (char) (97 + r.nextInt(26)); + if (0 == t && i < end - 1 && !lenEqual) { + // Make a surrogate pair + // High surrogate + buffer[i++] = (char) nextInt(r, 0xd800, 0xdbff); + // Low surrogate + buffer[i] = (char) nextInt(r, 0xdc00, 0xdfff); + } + else if (t <= 1) buffer[i] = (char) r.nextInt(0x80); + else if (2 == t) buffer[i] = (char) nextInt(r, 0x80, 0x800); + else if (3 == t) buffer[i] = (char) nextInt(r, 0x800, 0xd7ff); + else if (4 == t) buffer[i] = (char) nextInt(r, 0xe000, 0xffff); + } + return new String(buffer, 0, end); + } public static CodecProvider alwaysCodec(final Codec c) { CodecProvider p = new CodecProvider() { @@ -269,4 +323,17 @@ public class _TestUtil { ((ConcurrentMergeScheduler) ms).setMaxMergeCount(3); } } + + /** Checks some basic behaviour of an AttributeImpl + * @param reflectedValues contains a map with "AttributeClass#key" as values + */ + public static void assertAttributeReflection(final AttributeImpl att, Map reflectedValues) { + final Map map = new HashMap(); + att.reflectWith(new AttributeReflector() { + public void reflect(Class attClass, String key, Object value) { + map.put(attClass.getName() + '#' + key, value); + } + }); + Assert.assertEquals("Reflection does not produce same map", reflectedValues, map); + } } diff --git a/lucene/src/test/org/apache/lucene/util/automaton/AutomatonTestUtil.java b/lucene/src/test-framework/org/apache/lucene/util/automaton/AutomatonTestUtil.java similarity index 100% rename from lucene/src/test/org/apache/lucene/util/automaton/AutomatonTestUtil.java rename to lucene/src/test-framework/org/apache/lucene/util/automaton/AutomatonTestUtil.java diff --git a/lucene/src/test/org/apache/lucene/TestAssertions.java b/lucene/src/test/org/apache/lucene/TestAssertions.java index 373fd3db271..ce51fd34484 100644 --- a/lucene/src/test/org/apache/lucene/TestAssertions.java +++ b/lucene/src/test/org/apache/lucene/TestAssertions.java @@ -35,34 +35,45 @@ public class TestAssertions extends LuceneTestCase { } static class TestAnalyzer1 extends Analyzer { + @Override public final TokenStream tokenStream(String s, Reader r) { return null; } + @Override public final TokenStream reusableTokenStream(String s, Reader r) { return null; } } static final class TestAnalyzer2 extends Analyzer { + @Override public TokenStream tokenStream(String s, Reader r) { return null; } + @Override public TokenStream reusableTokenStream(String s, Reader r) { return null; } } static class TestAnalyzer3 extends Analyzer { + @Override public TokenStream tokenStream(String s, Reader r) { return null; } + @Override public TokenStream reusableTokenStream(String s, Reader r) { return null; } } static class TestAnalyzer4 extends Analyzer { + @Override public final TokenStream tokenStream(String s, Reader r) { return null; } + @Override public TokenStream reusableTokenStream(String s, Reader r) { return null; } } static class TestTokenStream1 extends TokenStream { + @Override public final boolean incrementToken() { return false; } } static final class TestTokenStream2 extends TokenStream { + @Override public boolean incrementToken() { return false; } } static class TestTokenStream3 extends TokenStream { + @Override public boolean incrementToken() { return false; } } diff --git a/lucene/src/test/org/apache/lucene/TestExternalCodecs.java b/lucene/src/test/org/apache/lucene/TestExternalCodecs.java index fa12956937c..9987e6f0e6e 100644 --- a/lucene/src/test/org/apache/lucene/TestExternalCodecs.java +++ b/lucene/src/test/org/apache/lucene/TestExternalCodecs.java @@ -18,6 +18,7 @@ package org.apache.lucene; */ import org.apache.lucene.util.*; +import org.apache.lucene.util.Bits; import org.apache.lucene.index.*; import org.apache.lucene.document.*; import org.apache.lucene.search.*; @@ -64,6 +65,7 @@ public class TestExternalCodecs extends LuceneTestCase { return t2.length-t1.length; } + @Override public boolean equals(Object other) { return this == other; } @@ -103,6 +105,8 @@ public class TestExternalCodecs extends LuceneTestCase { static class RAMField extends Terms { final String field; final SortedMap termToDocs = new TreeMap(); + long sumTotalTermFreq; + RAMField(String field) { this.field = field; } @@ -112,6 +116,11 @@ public class TestExternalCodecs extends LuceneTestCase { return termToDocs.size(); } + @Override + public long getSumTotalTermFreq() { + return sumTotalTermFreq; + } + @Override public TermsEnum iterator() { return new RAMTermsEnum(RAMOnlyCodec.RAMField.this); @@ -125,6 +134,7 @@ public class TestExternalCodecs extends LuceneTestCase { static class RAMTerm { final String term; + long totalTermFreq; final List docs = new ArrayList(); public RAMTerm(String term) { this.term = term; @@ -197,14 +207,16 @@ public class TestExternalCodecs extends LuceneTestCase { } @Override - public void finishTerm(BytesRef text, int numDocs) { - assert numDocs > 0; - assert numDocs == current.docs.size(); + public void finishTerm(BytesRef text, TermStats stats) { + assert stats.docFreq > 0; + assert stats.docFreq == current.docs.size(); + current.totalTermFreq = stats.totalTermFreq; field.termToDocs.put(current.term, current); } @Override - public void finish() { + public void finish(long sumTotalTermFreq) { + field.sumTotalTermFreq = sumTotalTermFreq; } } @@ -243,7 +255,6 @@ public class TestExternalCodecs extends LuceneTestCase { } } - // Classes for reading from the postings state static class RAMFieldsEnum extends FieldsEnum { private final RAMPostings postings; @@ -344,7 +355,8 @@ public class TestExternalCodecs extends LuceneTestCase { } @Override - public void cacheCurrentTerm() { + public long totalTermFreq() { + return ramField.termToDocs.get(current).totalTermFreq; } @Override @@ -546,7 +558,7 @@ public class TestExternalCodecs extends LuceneTestCase { // Terms dict success = false; try { - FieldsConsumer ret = new PrefixCodedTermsWriter(indexWriter, state, pulsingWriter, reverseUnicodeComparator); + FieldsConsumer ret = new BlockTermsWriter(indexWriter, state, pulsingWriter, reverseUnicodeComparator); success = true; return ret; } finally { @@ -587,15 +599,15 @@ public class TestExternalCodecs extends LuceneTestCase { // Terms dict reader success = false; try { - FieldsProducer ret = new PrefixCodedTermsReader(indexReader, - state.dir, - state.fieldInfos, - state.segmentInfo.name, - pulsingReader, - state.readBufferSize, - reverseUnicodeComparator, - StandardCodec.TERMS_CACHE_SIZE, - state.codecId); + FieldsProducer ret = new BlockTermsReader(indexReader, + state.dir, + state.fieldInfos, + state.segmentInfo.name, + pulsingReader, + state.readBufferSize, + reverseUnicodeComparator, + StandardCodec.TERMS_CACHE_SIZE, + state.codecId); success = true; return ret; } finally { @@ -612,7 +624,7 @@ public class TestExternalCodecs extends LuceneTestCase { @Override public void files(Directory dir, SegmentInfo segmentInfo, String codecId, Set files) throws IOException { StandardPostingsReader.files(dir, segmentInfo, codecId, files); - PrefixCodedTermsReader.files(dir, segmentInfo, codecId, files); + BlockTermsReader.files(dir, segmentInfo, codecId, files); FixedGapTermsIndexReader.files(dir, segmentInfo, codecId, files); } @@ -633,13 +645,15 @@ public class TestExternalCodecs extends LuceneTestCase { final int NUM_DOCS = 173; - Directory dir = newDirectory(); + MockDirectoryWrapper dir = newDirectory(); + dir.setCheckIndexOnClose(false); // we use a custom codec provider IndexWriter w = new IndexWriter( dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(MockTokenizer.WHITESPACE, true, true)). setCodecProvider(provider). setMergePolicy(newLogMergePolicy(3)) ); + w.setInfoStream(VERBOSE ? System.out : null); Document doc = new Document(); // uses default codec: doc.add(newField("field1", "this field uses the standard codec as the test", Field.Store.NO, Field.Index.ANALYZED)); @@ -661,7 +675,7 @@ public class TestExternalCodecs extends LuceneTestCase { } w.deleteDocuments(new Term("id", "77")); - IndexReader r = IndexReader.open(w); + IndexReader r = IndexReader.open(w, true); IndexReader[] subs = r.getSequentialSubReaders(); // test each segment for(int i=0;i 94 && i < 105) ) { + if (i < 10 || (i > 94 && i < 105) ) { Document d = searcher.doc(hits[i].doc); assertEquals("check " + i, String.valueOf(i), d.get(ID_FIELD)); } diff --git a/lucene/src/test/org/apache/lucene/analysis/TestNumericTokenStream.java b/lucene/src/test/org/apache/lucene/analysis/TestNumericTokenStream.java index e86307aab2c..586848b373b 100644 --- a/lucene/src/test/org/apache/lucene/analysis/TestNumericTokenStream.java +++ b/lucene/src/test/org/apache/lucene/analysis/TestNumericTokenStream.java @@ -38,13 +38,13 @@ public class TestNumericTokenStream extends BaseTokenStreamTestCase { final BytesRef bytes = new BytesRef(); stream.reset(); assertEquals(64, numericAtt.getValueSize()); - assertEquals(lvalue, numericAtt.getRawValue()); for (int shift=0; shift<64; shift+=NumericUtils.PRECISION_STEP_DEFAULT) { assertTrue("New token is available", stream.incrementToken()); assertEquals("Shift value wrong", shift, numericAtt.getShift()); final int hash = bytesAtt.toBytesRef(bytes); assertEquals("Hash incorrect", bytes.hashCode(), hash); assertEquals("Term is incorrectly encoded", lvalue & ~((1L << shift) - 1L), NumericUtils.prefixCodedToLong(bytes)); + assertEquals("Term raw value is incorrectly encoded", lvalue & ~((1L << shift) - 1L), numericAtt.getRawValue()); assertEquals("Type incorrect", (shift == 0) ? NumericTokenStream.TOKEN_TYPE_FULL_PREC : NumericTokenStream.TOKEN_TYPE_LOWER_PREC, typeAtt.type()); } assertFalse("More tokens available", stream.incrementToken()); @@ -61,13 +61,13 @@ public class TestNumericTokenStream extends BaseTokenStreamTestCase { final BytesRef bytes = new BytesRef(); stream.reset(); assertEquals(32, numericAtt.getValueSize()); - assertEquals(ivalue, numericAtt.getRawValue()); for (int shift=0; shift<32; shift+=NumericUtils.PRECISION_STEP_DEFAULT) { assertTrue("New token is available", stream.incrementToken()); assertEquals("Shift value wrong", shift, numericAtt.getShift()); final int hash = bytesAtt.toBytesRef(bytes); assertEquals("Hash incorrect", bytes.hashCode(), hash); assertEquals("Term is incorrectly encoded", ivalue & ~((1 << shift) - 1), NumericUtils.prefixCodedToInt(bytes)); + assertEquals("Term raw value is incorrectly encoded", ((long) ivalue) & ~((1L << shift) - 1L), numericAtt.getRawValue()); assertEquals("Type incorrect", (shift == 0) ? NumericTokenStream.TOKEN_TYPE_FULL_PREC : NumericTokenStream.TOKEN_TYPE_LOWER_PREC, typeAtt.type()); } assertFalse("More tokens available", stream.incrementToken()); diff --git a/lucene/src/test/org/apache/lucene/analysis/TestToken.java b/lucene/src/test/org/apache/lucene/analysis/TestToken.java index 94ab03dfb5f..29edc25a40a 100644 --- a/lucene/src/test/org/apache/lucene/analysis/TestToken.java +++ b/lucene/src/test/org/apache/lucene/analysis/TestToken.java @@ -22,8 +22,11 @@ import org.apache.lucene.analysis.tokenattributes.*; import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.Attribute; import org.apache.lucene.util.AttributeImpl; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util._TestUtil; import java.io.StringReader; +import java.util.HashMap; public class TestToken extends LuceneTestCase { @@ -241,6 +244,22 @@ public class TestToken extends LuceneTestCase { ts.addAttribute(TypeAttribute.class) instanceof Token); } + public void testAttributeReflection() throws Exception { + Token t = new Token("foobar", 6, 22, 8); + _TestUtil.assertAttributeReflection(t, + new HashMap() {{ + put(CharTermAttribute.class.getName() + "#term", "foobar"); + put(TermToBytesRefAttribute.class.getName() + "#bytes", new BytesRef("foobar")); + put(OffsetAttribute.class.getName() + "#startOffset", 6); + put(OffsetAttribute.class.getName() + "#endOffset", 22); + put(PositionIncrementAttribute.class.getName() + "#positionIncrement", 1); + put(PayloadAttribute.class.getName() + "#payload", null); + put(TypeAttribute.class.getName() + "#type", TypeAttribute.DEFAULT_TYPE); + put(FlagsAttribute.class.getName() + "#flags", 8); + }}); + } + + public static T assertCloneIsEqual(T att) { @SuppressWarnings("unchecked") T clone = (T) att.clone(); diff --git a/lucene/src/test/org/apache/lucene/analysis/tokenattributes/TestCharTermAttributeImpl.java b/lucene/src/test/org/apache/lucene/analysis/tokenattributes/TestCharTermAttributeImpl.java index 622f83d6e68..1d2ab4371b7 100644 --- a/lucene/src/test/org/apache/lucene/analysis/tokenattributes/TestCharTermAttributeImpl.java +++ b/lucene/src/test/org/apache/lucene/analysis/tokenattributes/TestCharTermAttributeImpl.java @@ -19,7 +19,10 @@ package org.apache.lucene.analysis.tokenattributes; import org.apache.lucene.analysis.TestToken; import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util._TestUtil; import java.nio.CharBuffer; +import java.util.HashMap; import java.util.Formatter; import java.util.Locale; import java.util.regex.Pattern; @@ -126,6 +129,15 @@ public class TestCharTermAttributeImpl extends LuceneTestCase { assertNotSame(buf, copy.buffer()); } + public void testAttributeReflection() throws Exception { + CharTermAttributeImpl t = new CharTermAttributeImpl(); + t.append("foobar"); + _TestUtil.assertAttributeReflection(t, new HashMap() {{ + put(CharTermAttribute.class.getName() + "#term", "foobar"); + put(TermToBytesRefAttribute.class.getName() + "#bytes", new BytesRef("foobar")); + }}); + } + public void testCharSequenceInterface() { final String s = "0123456789"; final CharTermAttributeImpl t = new CharTermAttributeImpl(); @@ -215,6 +227,7 @@ public class TestCharTermAttributeImpl extends LuceneTestCase { public char charAt(int i) { return longTestString.charAt(i); } public int length() { return longTestString.length(); } public CharSequence subSequence(int start, int end) { return longTestString.subSequence(start, end); } + @Override public String toString() { return longTestString; } }); assertEquals("4567890123456"+longTestString, t.toString()); diff --git a/lucene/src/test/org/apache/lucene/analysis/tokenattributes/TestSimpleAttributeImpl.java b/lucene/src/test/org/apache/lucene/analysis/tokenattributes/TestSimpleAttributeImpl.java new file mode 100644 index 00000000000..b8e9a0df7e2 --- /dev/null +++ b/lucene/src/test/org/apache/lucene/analysis/tokenattributes/TestSimpleAttributeImpl.java @@ -0,0 +1,46 @@ +package org.apache.lucene.analysis.tokenattributes; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.util._TestUtil; +import org.apache.lucene.util.LuceneTestCase; + +import java.util.Collections; +import java.util.HashMap; + +public class TestSimpleAttributeImpl extends LuceneTestCase { + + // this checks using reflection API if the defaults are correct + public void testAttributes() { + _TestUtil.assertAttributeReflection(new PositionIncrementAttributeImpl(), + Collections.singletonMap(PositionIncrementAttribute.class.getName()+"#positionIncrement", 1)); + _TestUtil.assertAttributeReflection(new FlagsAttributeImpl(), + Collections.singletonMap(FlagsAttribute.class.getName()+"#flags", 0)); + _TestUtil.assertAttributeReflection(new TypeAttributeImpl(), + Collections.singletonMap(TypeAttribute.class.getName()+"#type", TypeAttribute.DEFAULT_TYPE)); + _TestUtil.assertAttributeReflection(new PayloadAttributeImpl(), + Collections.singletonMap(PayloadAttribute.class.getName()+"#payload", null)); + _TestUtil.assertAttributeReflection(new KeywordAttributeImpl(), + Collections.singletonMap(KeywordAttribute.class.getName()+"#keyword", false)); + _TestUtil.assertAttributeReflection(new OffsetAttributeImpl(), new HashMap() {{ + put(OffsetAttribute.class.getName()+"#startOffset", 0); + put(OffsetAttribute.class.getName()+"#endOffset", 0); + }}); + } + +} diff --git a/lucene/src/test/org/apache/lucene/document/TestDocument.java b/lucene/src/test/org/apache/lucene/document/TestDocument.java index 51a71bf51dc..c505df68c94 100644 --- a/lucene/src/test/org/apache/lucene/document/TestDocument.java +++ b/lucene/src/test/org/apache/lucene/document/TestDocument.java @@ -156,7 +156,7 @@ public class TestDocument extends LuceneTestCase { writer.addDocument(makeDocumentWithFields()); IndexReader reader = writer.getReader(); - IndexSearcher searcher = new IndexSearcher(reader); + IndexSearcher searcher = newSearcher(reader); // search for something that does exists Query query = new TermQuery(new Term("keyword", "test1")); @@ -238,7 +238,7 @@ public class TestDocument extends LuceneTestCase { writer.addDocument(doc); IndexReader reader = writer.getReader(); - IndexSearcher searcher = new IndexSearcher(reader); + IndexSearcher searcher = newSearcher(reader); Query query = new TermQuery(new Term("keyword", "test")); diff --git a/lucene/src/test/org/apache/lucene/index/TestAddIndexes.java b/lucene/src/test/org/apache/lucene/index/TestAddIndexes.java index 8cafb7e110d..5ed6c089912 100755 --- a/lucene/src/test/org/apache/lucene/index/TestAddIndexes.java +++ b/lucene/src/test/org/apache/lucene/index/TestAddIndexes.java @@ -55,6 +55,7 @@ public class TestAddIndexes extends LuceneTestCase { writer = newWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()) .setOpenMode(OpenMode.CREATE)); + writer.setInfoStream(VERBOSE ? System.out : null); // add 100 documents addDocs(writer, 100); assertEquals(100, writer.maxDoc()); @@ -156,6 +157,7 @@ public class TestAddIndexes extends LuceneTestCase { setUpDirs(dir, aux); IndexWriter writer = newWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).setOpenMode(OpenMode.APPEND)); + writer.setInfoStream(VERBOSE ? System.out : null); writer.addIndexes(aux); // Adds 10 docs, then replaces them with another 10 @@ -427,7 +429,7 @@ public class TestAddIndexes extends LuceneTestCase { ); writer.addIndexes(aux, new MockDirectoryWrapper(random, new RAMDirectory(aux))); - assertEquals(1060, writer.maxDoc()); + assertEquals(1020, writer.maxDoc()); assertEquals(1000, writer.getDocCount(0)); writer.close(); dir.close(); @@ -451,6 +453,7 @@ public class TestAddIndexes extends LuceneTestCase { setMaxBufferedDocs(100). setMergePolicy(newLogMergePolicy(10)) ); + writer.setInfoStream(VERBOSE ? System.out : null); writer.addIndexes(aux); assertEquals(30, writer.maxDoc()); assertEquals(3, writer.getSegmentCount()); @@ -479,7 +482,7 @@ public class TestAddIndexes extends LuceneTestCase { ); writer.addIndexes(aux, aux2); - assertEquals(1060, writer.maxDoc()); + assertEquals(1040, writer.maxDoc()); assertEquals(1000, writer.getDocCount(0)); writer.close(); dir.close(); diff --git a/lucene/src/test/org/apache/lucene/index/TestAtomicUpdate.java b/lucene/src/test/org/apache/lucene/index/TestAtomicUpdate.java index b5f44752178..95da21de23e 100644 --- a/lucene/src/test/org/apache/lucene/index/TestAtomicUpdate.java +++ b/lucene/src/test/org/apache/lucene/index/TestAtomicUpdate.java @@ -131,6 +131,7 @@ public class TestAtomicUpdate extends LuceneTestCase { .setMaxBufferedDocs(7); ((LogMergePolicy) conf.getMergePolicy()).setMergeFactor(3); IndexWriter writer = new MockIndexWriter(directory, conf); + writer.setInfoStream(VERBOSE ? System.out : null); // Establish a base index of 100 docs: for(int i=0;i<100;i++) { diff --git a/lucene/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java b/lucene/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java index e85e4fb4bc5..39eeef0f81b 100644 --- a/lucene/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java +++ b/lucene/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java @@ -17,20 +17,13 @@ package org.apache.lucene.index; * limitations under the License. */ -import java.io.BufferedOutputStream; import java.io.ByteArrayOutputStream; import java.io.File; -import java.io.FileOutputStream; import java.io.IOException; -import java.io.InputStream; -import java.io.OutputStream; import java.io.PrintStream; import java.util.Arrays; -import java.util.Enumeration; import java.util.List; import java.util.Random; -import java.util.zip.ZipEntry; -import java.util.zip.ZipFile; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.document.Document; @@ -38,11 +31,13 @@ import org.apache.lucene.document.Field; import org.apache.lucene.document.Fieldable; import org.apache.lucene.document.NumericField; import org.apache.lucene.index.IndexWriterConfig.OpenMode; +import org.apache.lucene.search.DefaultSimilarity; import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.search.FieldCache; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.NumericRangeQuery; import org.apache.lucene.search.ScoreDoc; +import org.apache.lucene.search.SimilarityProvider; import org.apache.lucene.search.TermQuery; import org.apache.lucene.store.Directory; import org.apache.lucene.util.Bits; @@ -76,39 +71,6 @@ public class TestBackwardsCompatibility extends LuceneTestCase { } */ - /* Unzips zipName --> dirName, removing dirName - first */ - public void unzip(File zipName, String destDirName) throws IOException { - - ZipFile zipFile = new ZipFile(zipName); - - Enumeration entries = zipFile.entries(); - - String dirName = fullDir(destDirName); - - File fileDir = new File(dirName); - rmDir(destDirName); - - fileDir.mkdir(); - - while (entries.hasMoreElements()) { - ZipEntry entry = entries.nextElement(); - - InputStream in = zipFile.getInputStream(entry); - OutputStream out = new BufferedOutputStream(new FileOutputStream(new File(fileDir, entry.getName()))); - - byte[] buffer = new byte[8192]; - int len; - while((len = in.read(buffer)) >= 0) { - out.write(buffer, 0, len); - } - - in.close(); - out.close(); - } - - zipFile.close(); - } /* public void testCreateCFS() throws IOException { String dirName = "testindex.cfs"; @@ -151,10 +113,9 @@ public class TestBackwardsCompatibility extends LuceneTestCase { if (VERBOSE) { System.out.println("TEST: index " + unsupportedNames[i]); } - unzip(getDataFile("unsupported." + unsupportedNames[i] + ".zip"), unsupportedNames[i]); - - String fullPath = fullDir(unsupportedNames[i]); - Directory dir = newFSDirectory(new File(fullPath)); + File oldIndxeDir = _TestUtil.getTempDir(unsupportedNames[i]); + _TestUtil.unzip(getDataFile("unsupported." + unsupportedNames[i] + ".zip"), oldIndxeDir); + Directory dir = newFSDirectory(oldIndxeDir); IndexReader reader = null; IndexWriter writer = null; @@ -170,15 +131,7 @@ public class TestBackwardsCompatibility extends LuceneTestCase { try { writer = new IndexWriter(dir, newIndexWriterConfig( - TEST_VERSION_CURRENT, new MockAnalyzer()) - .setMergeScheduler(new SerialMergeScheduler()) // no threads! - ); - // TODO: Make IndexWriter fail on open! - if (random.nextBoolean()) { - writer.optimize(); - } else { - reader = writer.getReader(); - } + TEST_VERSION_CURRENT, new MockAnalyzer())); fail("IndexWriter creation should not pass for "+unsupportedNames[i]); } catch (IndexFormatTooOldException e) { // pass @@ -187,17 +140,13 @@ public class TestBackwardsCompatibility extends LuceneTestCase { e.printStackTrace(System.out); } } finally { - if (reader != null) reader.close(); - reader = null; + // we should fail to open IW, and so it should be null when we get here. + // However, if the test fails (i.e., IW did not fail on open), we need + // to close IW. However, if merges are run, IW may throw + // IndexFormatTooOldException, and we don't want to mask the fail() + // above, so close without waiting for merges. if (writer != null) { - try { - writer.close(); - } catch (IndexFormatTooOldException e) { - // OK -- since IW gives merge scheduler a chance - // to merge at close, it's possible and fine to - // hit this exc here - writer.close(false); - } + writer.close(false); } writer = null; } @@ -210,34 +159,37 @@ public class TestBackwardsCompatibility extends LuceneTestCase { assertTrue(bos.toString().contains(IndexFormatTooOldException.class.getName())); dir.close(); - rmDir(unsupportedNames[i]); + _TestUtil.rmDir(oldIndxeDir); } } public void testOptimizeOldIndex() throws Exception { for(int i=0;i commits) throws IOException { + if (VERBOSE) { + System.out.println("TEST: onInit"); + } verifyCommitOrder(commits); numOnInit++; // do no deletions on init @@ -126,6 +129,9 @@ public class TestDeletionPolicy extends LuceneTestCase { } public void onCommit(List commits) throws IOException { + if (VERBOSE) { + System.out.println("TEST: onCommit"); + } verifyCommitOrder(commits); doDeletes(commits, true); } @@ -200,8 +206,10 @@ public class TestDeletionPolicy extends LuceneTestCase { IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()) .setIndexDeletionPolicy(policy); - LogMergePolicy lmp = (LogMergePolicy) conf.getMergePolicy(); - lmp.setUseCompoundFile(true); + MergePolicy mp = conf.getMergePolicy(); + if (mp instanceof LogMergePolicy) { + ((LogMergePolicy) mp).setUseCompoundFile(true); + } IndexWriter writer = new IndexWriter(dir, conf); writer.close(); @@ -215,8 +223,10 @@ public class TestDeletionPolicy extends LuceneTestCase { conf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).setOpenMode( OpenMode.APPEND).setIndexDeletionPolicy(policy); - lmp = (LogMergePolicy) conf.getMergePolicy(); - lmp.setUseCompoundFile(true); + mp = conf.getMergePolicy(); + if (mp instanceof LogMergePolicy) { + ((LogMergePolicy) mp).setUseCompoundFile(true); + } writer = new IndexWriter(dir, conf); for(int j=0;j<17;j++) { addDoc(writer); @@ -280,6 +290,10 @@ public class TestDeletionPolicy extends LuceneTestCase { public void testKeepAllDeletionPolicy() throws IOException { for(int pass=0;pass<2;pass++) { + if (VERBOSE) { + System.out.println("TEST: cycle pass=" + pass); + } + boolean useCompoundFile = (pass % 2) != 0; // Never deletes a commit @@ -292,34 +306,48 @@ public class TestDeletionPolicy extends LuceneTestCase { TEST_VERSION_CURRENT, new MockAnalyzer()) .setIndexDeletionPolicy(policy).setMaxBufferedDocs(10) .setMergeScheduler(new SerialMergeScheduler()); - LogMergePolicy lmp = (LogMergePolicy) conf.getMergePolicy(); - lmp.setUseCompoundFile(useCompoundFile); - lmp.setMergeFactor(10); + MergePolicy mp = conf.getMergePolicy(); + if (mp instanceof LogMergePolicy) { + ((LogMergePolicy) mp).setUseCompoundFile(useCompoundFile); + } IndexWriter writer = new IndexWriter(dir, conf); for(int i=0;i<107;i++) { addDoc(writer); } writer.close(); - conf = newIndexWriterConfig(TEST_VERSION_CURRENT, - new MockAnalyzer()).setOpenMode( - OpenMode.APPEND).setIndexDeletionPolicy(policy); - lmp = (LogMergePolicy) conf.getMergePolicy(); - lmp.setUseCompoundFile(useCompoundFile); - writer = new IndexWriter(dir, conf); - writer.optimize(); - writer.close(); - - assertEquals(1, policy.numOnInit); + final boolean isOptimized; + { + IndexReader r = IndexReader.open(dir); + isOptimized = r.isOptimized(); + r.close(); + } + if (!isOptimized) { + conf = newIndexWriterConfig(TEST_VERSION_CURRENT, + new MockAnalyzer()).setOpenMode( + OpenMode.APPEND).setIndexDeletionPolicy(policy); + mp = conf.getMergePolicy(); + if (mp instanceof LogMergePolicy) { + ((LogMergePolicy) mp).setUseCompoundFile(useCompoundFile); + } + if (VERBOSE) { + System.out.println("TEST: open writer for optimize"); + } + writer = new IndexWriter(dir, conf); + writer.setInfoStream(VERBOSE ? System.out : null); + writer.optimize(); + writer.close(); + } + assertEquals(isOptimized ? 0:1, policy.numOnInit); // If we are not auto committing then there should // be exactly 2 commits (one per close above): - assertEquals(2, policy.numOnCommit); + assertEquals(1 + (isOptimized ? 0:1), policy.numOnCommit); // Test listCommits Collection commits = IndexReader.listCommits(dir); // 2 from closing writer - assertEquals(2, commits.size()); + assertEquals(1 + (isOptimized ? 0:1), commits.size()); // Make sure we can open a reader on each commit: for (final IndexCommit commit : commits) { @@ -480,8 +508,10 @@ public class TestDeletionPolicy extends LuceneTestCase { TEST_VERSION_CURRENT, new MockAnalyzer()) .setOpenMode(OpenMode.CREATE).setIndexDeletionPolicy(policy) .setMaxBufferedDocs(10); - LogMergePolicy lmp = (LogMergePolicy) conf.getMergePolicy(); - lmp.setUseCompoundFile(useCompoundFile); + MergePolicy mp = conf.getMergePolicy(); + if (mp instanceof LogMergePolicy) { + ((LogMergePolicy) mp).setUseCompoundFile(useCompoundFile); + } IndexWriter writer = new IndexWriter(dir, conf); for(int i=0;i<107;i++) { addDoc(writer); @@ -490,8 +520,10 @@ public class TestDeletionPolicy extends LuceneTestCase { conf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()) .setOpenMode(OpenMode.APPEND).setIndexDeletionPolicy(policy); - lmp = (LogMergePolicy) conf.getMergePolicy(); - lmp.setUseCompoundFile(useCompoundFile); + mp = conf.getMergePolicy(); + if (mp instanceof LogMergePolicy) { + ((LogMergePolicy) mp).setUseCompoundFile(true); + } writer = new IndexWriter(dir, conf); writer.optimize(); writer.close(); @@ -529,8 +561,10 @@ public class TestDeletionPolicy extends LuceneTestCase { TEST_VERSION_CURRENT, new MockAnalyzer()) .setOpenMode(OpenMode.CREATE).setIndexDeletionPolicy(policy) .setMaxBufferedDocs(10); - LogMergePolicy lmp = (LogMergePolicy) conf.getMergePolicy(); - lmp.setUseCompoundFile(useCompoundFile); + MergePolicy mp = conf.getMergePolicy(); + if (mp instanceof LogMergePolicy) { + ((LogMergePolicy) mp).setUseCompoundFile(useCompoundFile); + } IndexWriter writer = new IndexWriter(dir, conf); for(int i=0;i<17;i++) { addDoc(writer); @@ -585,47 +619,65 @@ public class TestDeletionPolicy extends LuceneTestCase { Directory dir = newDirectory(); IndexWriterConfig conf = newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer()) - .setOpenMode(OpenMode.CREATE).setIndexDeletionPolicy(policy); - LogMergePolicy lmp = (LogMergePolicy) conf.getMergePolicy(); - lmp.setUseCompoundFile(useCompoundFile); + .setOpenMode(OpenMode.CREATE).setIndexDeletionPolicy(policy).setMergePolicy(newInOrderLogMergePolicy()); + MergePolicy mp = conf.getMergePolicy(); + if (mp instanceof LogMergePolicy) { + ((LogMergePolicy) mp).setUseCompoundFile(useCompoundFile); + } IndexWriter writer = new IndexWriter(dir, conf); writer.close(); Term searchTerm = new Term("content", "aaa"); Query query = new TermQuery(searchTerm); for(int i=0;i positions = new ArrayList(); + StringBuilder builder = new StringBuilder(); + for (int j = 0; j < 3049; j++) { + int nextInt = random.nextInt(max); + builder.append(nextInt).append(" "); + if (nextInt == term) { + positions.add(Integer.valueOf(j)); + } + } + if (positions.size() == 0) { + builder.append(term); + positions.add(3049); + } + doc.add(newField(fieldName, builder.toString(), Field.Store.YES, + Field.Index.ANALYZED)); + positionsInDoc[i] = positions.toArray(new Integer[0]); + writer.addDocument(doc); + } + + IndexReader reader = writer.getReader(); + writer.close(); + + for (int i = 0; i < 39 * RANDOM_MULTIPLIER; i++) { + BytesRef bytes = new BytesRef("" + term); + ReaderContext topReaderContext = reader.getTopReaderContext(); + AtomicReaderContext[] leaves = ReaderUtil.leaves(topReaderContext); + for (AtomicReaderContext atomicReaderContext : leaves) { + DocsAndPositionsEnum docsAndPosEnum = getDocsAndPositions( + atomicReaderContext.reader, bytes, null); + assertNotNull(docsAndPosEnum); + int initDoc = 0; + int maxDoc = atomicReaderContext.reader.maxDoc(); + // initially advance or do next doc + if (random.nextBoolean()) { + initDoc = docsAndPosEnum.nextDoc(); + } else { + initDoc = docsAndPosEnum.advance(random.nextInt(maxDoc)); + } + // now run through the scorer and check if all positions are there... + do { + int docID = docsAndPosEnum.docID(); + if (docID == DocsAndPositionsEnum.NO_MORE_DOCS) { + break; + } + Integer[] pos = positionsInDoc[atomicReaderContext.docBase + docID]; + assertEquals(pos.length, docsAndPosEnum.freq()); + // number of positions read should be random - don't read all of them + // allways + final int howMany = random.nextInt(20) == 0 ? pos.length + - random.nextInt(pos.length) : pos.length; + for (int j = 0; j < howMany; j++) { + assertEquals("iteration: " + i + " initDoc: " + initDoc + " doc: " + + docID + " base: " + atomicReaderContext.docBase + + " positions: " + Arrays.toString(pos) + " usePayloads: " + + usePayload, pos[j].intValue(), docsAndPosEnum.nextPosition()); + } + + if (random.nextInt(10) == 0) { // once is a while advance + docsAndPosEnum + .advance(docID + 1 + random.nextInt((maxDoc - docID))); + } + + } while (docsAndPosEnum.nextDoc() != DocsAndPositionsEnum.NO_MORE_DOCS); + } + + } + reader.close(); + dir.close(); + } + + public void testRandomDocs() throws IOException { + Directory dir = newDirectory(); + RandomIndexWriter writer = new RandomIndexWriter(random, dir, + newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer( + MockTokenizer.WHITESPACE, true, usePayload)).setMergePolicy(newInOrderLogMergePolicy())); + int numDocs = 499; + int max = 15678; + int term = random.nextInt(max); + int[] freqInDoc = new int[numDocs]; + for (int i = 0; i < numDocs; i++) { + Document doc = new Document(); + StringBuilder builder = new StringBuilder(); + for (int j = 0; j < 199; j++) { + int nextInt = random.nextInt(max); + builder.append(nextInt).append(" "); + if (nextInt == term) { + freqInDoc[i]++; + } + } + doc.add(newField(fieldName, builder.toString(), Field.Store.YES, + Field.Index.ANALYZED)); + writer.addDocument(doc); + } + + IndexReader reader = writer.getReader(); + writer.close(); + + for (int i = 0; i < 39 * RANDOM_MULTIPLIER; i++) { + BytesRef bytes = new BytesRef("" + term); + ReaderContext topReaderContext = reader.getTopReaderContext(); + AtomicReaderContext[] leaves = ReaderUtil.leaves(topReaderContext); + for (AtomicReaderContext context : leaves) { + int maxDoc = context.reader.maxDoc(); + DocsEnum docsAndPosEnum = getDocsEnum(context.reader, bytes, true, null); + if (findNext(freqInDoc, context.docBase, context.docBase + maxDoc) == Integer.MAX_VALUE) { + assertNull(docsAndPosEnum); + continue; + } + assertNotNull(docsAndPosEnum); + docsAndPosEnum.nextDoc(); + for (int j = 0; j < maxDoc; j++) { + if (freqInDoc[context.docBase + j] != 0) { + assertEquals(j, docsAndPosEnum.docID()); + assertEquals(docsAndPosEnum.freq(), freqInDoc[context.docBase +j]); + if (i % 2 == 0 && random.nextInt(10) == 0) { + int next = findNext(freqInDoc, context.docBase+j+1, context.docBase + maxDoc) - context.docBase; + int advancedTo = docsAndPosEnum.advance(next); + if (next >= maxDoc) { + assertEquals(DocsEnum.NO_MORE_DOCS, advancedTo); + } else { + assertTrue("advanced to: " +advancedTo + " but should be <= " + next, next >= advancedTo); + } + } else { + docsAndPosEnum.nextDoc(); + } + } + } + assertEquals("docBase: " + context.docBase + " maxDoc: " + maxDoc + " " + docsAndPosEnum.getClass(), DocsEnum.NO_MORE_DOCS, docsAndPosEnum.docID()); + } + + } + + reader.close(); + dir.close(); + } + + private static int findNext(int[] docs, int pos, int max) { + for (int i = pos; i < max; i++) { + if( docs[i] != 0) { + return i; + } + } + return Integer.MAX_VALUE; + } + + /** + * tests retrieval of positions for terms that have a large number of + * occurrences to force test of buffer refill during positions iteration. + */ + public void testLargeNumberOfPositions() throws IOException { + Directory dir = newDirectory(); + RandomIndexWriter writer = new RandomIndexWriter(random, dir, + newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer( + MockTokenizer.WHITESPACE, true, usePayload))); + int howMany = 1000; + for (int i = 0; i < 39; i++) { + Document doc = new Document(); + StringBuilder builder = new StringBuilder(); + for (int j = 0; j < howMany; j++) { + if (j % 2 == 0) { + builder.append("even "); + } else { + builder.append("odd "); + } + } + doc.add(newField(fieldName, builder.toString(), Field.Store.YES, + Field.Index.ANALYZED)); + writer.addDocument(doc); + } + + // now do seaches + IndexReader reader = writer.getReader(); + writer.close(); + + for (int i = 0; i < 39 * RANDOM_MULTIPLIER; i++) { + BytesRef bytes = new BytesRef("even"); + + ReaderContext topReaderContext = reader.getTopReaderContext(); + AtomicReaderContext[] leaves = ReaderUtil.leaves(topReaderContext); + for (AtomicReaderContext atomicReaderContext : leaves) { + DocsAndPositionsEnum docsAndPosEnum = getDocsAndPositions( + atomicReaderContext.reader, bytes, null); + assertNotNull(docsAndPosEnum); + + int initDoc = 0; + int maxDoc = atomicReaderContext.reader.maxDoc(); + // initially advance or do next doc + if (random.nextBoolean()) { + initDoc = docsAndPosEnum.nextDoc(); + } else { + initDoc = docsAndPosEnum.advance(random.nextInt(maxDoc)); + } + String msg = "Iteration: " + i + " initDoc: " + initDoc + " payloads: " + + usePayload; + assertEquals(howMany / 2, docsAndPosEnum.freq()); + for (int j = 0; j < howMany; j += 2) { + assertEquals("position missmatch index: " + j + " with freq: " + + docsAndPosEnum.freq() + " -- " + msg, j, + docsAndPosEnum.nextPosition()); + } + } + } + reader.close(); + dir.close(); + } + +} diff --git a/lucene/src/test/org/apache/lucene/index/TestFieldsReader.java b/lucene/src/test/org/apache/lucene/index/TestFieldsReader.java index 15e23ce886e..94ed396e8fa 100644 --- a/lucene/src/test/org/apache/lucene/index/TestFieldsReader.java +++ b/lucene/src/test/org/apache/lucene/index/TestFieldsReader.java @@ -51,7 +51,7 @@ public class TestFieldsReader extends LuceneTestCase { DocHelper.setupDoc(testDoc); fieldInfos.add(testDoc); dir = newDirectory(); - IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()); + IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).setMergePolicy(newLogMergePolicy()); ((LogMergePolicy) conf.getMergePolicy()).setUseCompoundFile(false); IndexWriter writer = new IndexWriter(dir, conf); writer.addDocument(testDoc); @@ -291,7 +291,7 @@ public class TestFieldsReader extends LuceneTestCase { Directory tmpDir = newFSDirectory(file); assertTrue(tmpDir != null); - IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).setOpenMode(OpenMode.CREATE); + IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).setOpenMode(OpenMode.CREATE).setMergePolicy(newLogMergePolicy()); ((LogMergePolicy) conf.getMergePolicy()).setUseCompoundFile(false); IndexWriter writer = new IndexWriter(tmpDir, conf); writer.addDocument(testDoc); diff --git a/lucene/src/test/org/apache/lucene/index/TestFilterIndexReader.java b/lucene/src/test/org/apache/lucene/index/TestFilterIndexReader.java index 5f4dfd36e80..c17dc38b9aa 100644 --- a/lucene/src/test/org/apache/lucene/index/TestFilterIndexReader.java +++ b/lucene/src/test/org/apache/lucene/index/TestFilterIndexReader.java @@ -38,9 +38,11 @@ public class TestFilterIndexReader extends LuceneTestCase { TestFields(Fields in) { super(in); } + @Override public FieldsEnum iterator() throws IOException { return new TestFieldsEnum(super.iterator()); } + @Override public Terms terms(String field) throws IOException { return new TestTerms(super.terms(field)); } @@ -51,6 +53,7 @@ public class TestFilterIndexReader extends LuceneTestCase { super(in); } + @Override public TermsEnum iterator() throws IOException { return new TestTermsEnum(super.iterator()); } @@ -61,6 +64,7 @@ public class TestFilterIndexReader extends LuceneTestCase { super(in); } + @Override public TermsEnum terms() throws IOException { return new TestTermsEnum(super.terms()); } diff --git a/lucene/src/test/org/apache/lucene/index/TestIndexFileDeleter.java b/lucene/src/test/org/apache/lucene/index/TestIndexFileDeleter.java index bdaa1dbe198..ea514a861ea 100644 --- a/lucene/src/test/org/apache/lucene/index/TestIndexFileDeleter.java +++ b/lucene/src/test/org/apache/lucene/index/TestIndexFileDeleter.java @@ -18,6 +18,8 @@ package org.apache.lucene.index; */ import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.search.DefaultSimilarity; +import org.apache.lucene.search.Similarity; import org.apache.lucene.store.Directory; import org.apache.lucene.store.IndexInput; import org.apache.lucene.store.IndexOutput; @@ -66,9 +68,9 @@ public class TestIndexFileDeleter extends LuceneTestCase { Term searchTerm = new Term("id", "7"); int delCount = reader.deleteDocuments(searchTerm); assertEquals("didn't delete the right number of documents", 1, delCount); - + Similarity sim = new DefaultSimilarity().get("content"); // Set one norm so we get a .s0 file: - reader.setNorm(21, "content", (float) 1.5); + reader.setNorm(21, "content", sim.encodeNormValue(1.5f)); reader.close(); // Now, artificially create an extra .del file & extra diff --git a/lucene/src/test/org/apache/lucene/index/TestIndexReader.java b/lucene/src/test/org/apache/lucene/index/TestIndexReader.java index e75d17c28dc..7b3399cfe6f 100644 --- a/lucene/src/test/org/apache/lucene/index/TestIndexReader.java +++ b/lucene/src/test/org/apache/lucene/index/TestIndexReader.java @@ -29,7 +29,7 @@ import java.util.Map; import java.util.HashMap; import java.util.Set; import java.util.SortedSet; - +import org.junit.Assume; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; @@ -39,8 +39,10 @@ import org.apache.lucene.document.SetBasedFieldSelector; import org.apache.lucene.index.IndexReader.FieldOption; import org.apache.lucene.index.codecs.CodecProvider; import org.apache.lucene.index.IndexWriterConfig.OpenMode; +import org.apache.lucene.search.DefaultSimilarity; import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.search.FieldCache; +import org.apache.lucene.search.Similarity; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TermQuery; @@ -329,6 +331,7 @@ public class TestIndexReader extends LuceneTestCase // add 100 documents with term : aaa writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer())); + writer.setInfoStream(VERBOSE ? System.out : null); for (int i = 0; i < 100; i++) { addDoc(writer, searchTerm.text()); } @@ -357,7 +360,7 @@ public class TestIndexReader extends LuceneTestCase // CREATE A NEW READER and re-test reader = IndexReader.open(dir, false); - assertEquals("deleted docFreq", 100, reader.docFreq(searchTerm)); + assertEquals("deleted docFreq", 0, reader.docFreq(searchTerm)); assertTermDocsCount("deleted termDocs", reader, searchTerm, 0); reader.close(); reader2.close(); @@ -368,7 +371,7 @@ public class TestIndexReader extends LuceneTestCase Directory dir = newDirectory(); byte[] bin = new byte[]{0, 1, 2, 3, 4, 5, 6, 7, 8, 9}; - IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer())); + IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).setMergePolicy(newInOrderLogMergePolicy())); for (int i = 0; i < 10; i++) { addDoc(writer, "document number " + (i + 1)); @@ -377,7 +380,7 @@ public class TestIndexReader extends LuceneTestCase addDocumentWithTermVectorFields(writer); } writer.close(); - writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).setOpenMode(OpenMode.APPEND)); + writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).setOpenMode(OpenMode.APPEND).setMergePolicy(newInOrderLogMergePolicy())); Document doc = new Document(); doc.add(new Field("bin1", bin)); doc.add(new Field("junk", "junk text", Field.Store.NO, Field.Index.ANALYZED)); @@ -414,7 +417,7 @@ public class TestIndexReader extends LuceneTestCase // force optimize - writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).setOpenMode(OpenMode.APPEND)); + writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).setOpenMode(OpenMode.APPEND).setMergePolicy(newInOrderLogMergePolicy())); writer.optimize(); writer.close(); reader = IndexReader.open(dir, false); @@ -462,8 +465,9 @@ public class TestIndexReader extends LuceneTestCase // expected } + Similarity sim = new DefaultSimilarity().get("aaa"); try { - reader.setNorm(5, "aaa", 2.0f); + reader.setNorm(5, "aaa", sim.encodeNormValue(2.0f)); fail("setNorm after close failed to throw IOException"); } catch (AlreadyClosedException e) { // expected @@ -502,8 +506,9 @@ public class TestIndexReader extends LuceneTestCase // expected } + Similarity sim = new DefaultSimilarity().get("aaa"); try { - reader.setNorm(5, "aaa", 2.0f); + reader.setNorm(5, "aaa", sim.encodeNormValue(2.0f)); fail("setNorm should have hit LockObtainFailedException"); } catch (LockObtainFailedException e) { // expected @@ -533,7 +538,8 @@ public class TestIndexReader extends LuceneTestCase // now open reader & set norm for doc 0 IndexReader reader = IndexReader.open(dir, false); - reader.setNorm(0, "content", (float) 2.0); + Similarity sim = new DefaultSimilarity().get("content"); + reader.setNorm(0, "content", sim.encodeNormValue(2.0f)); // we should be holding the write lock now: assertTrue("locked", IndexWriter.isLocked(dir)); @@ -547,7 +553,7 @@ public class TestIndexReader extends LuceneTestCase IndexReader reader2 = IndexReader.open(dir, false); // set norm again for doc 0 - reader.setNorm(0, "content", (float) 3.0); + reader.setNorm(0, "content", sim.encodeNormValue(3.0f)); assertTrue("locked", IndexWriter.isLocked(dir)); reader.close(); @@ -577,15 +583,16 @@ public class TestIndexReader extends LuceneTestCase addDoc(writer, searchTerm.text()); writer.close(); + Similarity sim = new DefaultSimilarity().get("content"); // now open reader & set norm for doc 0 (writes to // _0_1.s0) reader = IndexReader.open(dir, false); - reader.setNorm(0, "content", (float) 2.0); + reader.setNorm(0, "content", sim.encodeNormValue(2.0f)); reader.close(); // now open reader again & set norm for doc 0 (writes to _0_2.s0) reader = IndexReader.open(dir, false); - reader.setNorm(0, "content", (float) 2.0); + reader.setNorm(0, "content", sim.encodeNormValue(2.0f)); reader.close(); assertFalse("failed to remove first generation norms file on writing second generation", dir.fileExists("_0_1.s0")); @@ -690,7 +697,6 @@ public class TestIndexReader extends LuceneTestCase // CREATE A NEW READER and re-test reader = IndexReader.open(dir, false); - assertEquals("deleted docFreq", 100, reader.docFreq(searchTerm)); assertEquals("deleted docFreq", 100, reader.docFreq(searchTerm2)); assertTermDocsCount("deleted termDocs", reader, searchTerm, 0); assertTermDocsCount("deleted termDocs", reader, searchTerm2, 100); @@ -831,7 +837,6 @@ public class TestIndexReader extends LuceneTestCase writer.close(); IndexReader reader = IndexReader.open(dir, false); reader.deleteDocument(0); - reader.deleteDocument(1); reader.close(); reader = IndexReader.open(dir, false); reader.undeleteAll(); @@ -848,7 +853,6 @@ public class TestIndexReader extends LuceneTestCase writer.close(); IndexReader reader = IndexReader.open(dir, false); reader.deleteDocument(0); - reader.deleteDocument(1); reader.close(); reader = IndexReader.open(dir, false); reader.undeleteAll(); @@ -880,6 +884,10 @@ public class TestIndexReader extends LuceneTestCase // First build up a starting index: MockDirectoryWrapper startDir = newDirectory(); IndexWriter writer = new IndexWriter(startDir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer())); + if (VERBOSE) { + System.out.println("TEST: create initial index"); + writer.setInfoStream(System.out); + } for(int i=0;i<157;i++) { Document d = new Document(); d.add(newField("id", Integer.toString(i), Field.Store.YES, Field.Index.NOT_ANALYZED)); @@ -890,6 +898,20 @@ public class TestIndexReader extends LuceneTestCase } writer.close(); + { + IndexReader r = IndexReader.open(startDir); + IndexSearcher searcher = newSearcher(r); + ScoreDoc[] hits = null; + try { + hits = searcher.search(new TermQuery(searchTerm), null, 1000).scoreDocs; + } catch (IOException e) { + e.printStackTrace(); + fail("exception when init searching: " + e); + } + searcher.close(); + r.close(); + } + long diskUsage = startDir.getRecomputedActualSizeInBytes(); long diskFree = diskUsage+100; @@ -947,13 +969,13 @@ public class TestIndexReader extends LuceneTestCase dir.setMaxSizeInBytes(thisDiskFree); dir.setRandomIOExceptionRate(rate); - + Similarity sim = new DefaultSimilarity().get("content"); try { if (0 == x) { int docId = 12; for(int i=0;i<13;i++) { reader.deleteDocument(docId); - reader.setNorm(docId, "content", (float) 2.0); + reader.setNorm(docId, "content", sim.encodeNormValue(2.0f)); docId += 12; } } @@ -975,14 +997,6 @@ public class TestIndexReader extends LuceneTestCase } } - // Whether we succeeded or failed, check that all - // un-referenced files were in fact deleted (ie, - // we did not create garbage). Just create a - // new IndexFileDeleter, have it delete - // unreferenced files, then verify that in fact - // no files were deleted: - TestIndexWriter.assertNoUnreferencedFiles(dir, "reader.close() failed to delete unreferenced files"); - // Finally, verify index is not corrupt, and, if // we succeeded, we see all docs changed, and if // we failed, we see either all docs or no docs @@ -1010,7 +1024,7 @@ public class TestIndexReader extends LuceneTestCase } */ - IndexSearcher searcher = new IndexSearcher(newReader); + IndexSearcher searcher = newSearcher(newReader); ScoreDoc[] hits = null; try { hits = searcher.search(new TermQuery(searchTerm), null, 1000).scoreDocs; @@ -1110,8 +1124,9 @@ public class TestIndexReader extends LuceneTestCase } reader = IndexReader.open(dir, false); + Similarity sim = new DefaultSimilarity().get("content"); try { - reader.setNorm(1, "content", (float) 2.0); + reader.setNorm(1, "content", sim.encodeNormValue(2.0f)); fail("did not hit exception when calling setNorm on an invalid doc number"); } catch (ArrayIndexOutOfBoundsException e) { // expected @@ -1148,7 +1163,7 @@ public class TestIndexReader extends LuceneTestCase public void testMultiReaderDeletes() throws Exception { Directory dir = newDirectory(); - RandomIndexWriter w = new RandomIndexWriter(random, dir); + RandomIndexWriter w= new RandomIndexWriter(random, dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).setMergePolicy(newInOrderLogMergePolicy())); Document doc = new Document(); doc.add(newField("f", "doctor", Field.Store.NO, Field.Index.NOT_ANALYZED)); w.addDocument(doc); @@ -1264,9 +1279,6 @@ public class TestIndexReader extends LuceneTestCase // Open another reader to confirm that everything is deleted reader2 = IndexReader.open(dir, false); - assertEquals("reopened 2", 100, reader2.docFreq(searchTerm1)); - assertEquals("reopened 2", 100, reader2.docFreq(searchTerm2)); - assertEquals("reopened 2", 100, reader2.docFreq(searchTerm3)); assertTermDocsCount("reopened 2", reader2, searchTerm1, 0); assertTermDocsCount("reopened 2", reader2, searchTerm2, 0); assertTermDocsCount("reopened 2", reader2, searchTerm3, 100); @@ -1862,4 +1874,65 @@ public class TestIndexReader extends LuceneTestCase assertTrue(IndexReader.indexExists(dir)); dir.close(); } + + // Make sure totalTermFreq works correctly in the terms + // dict cache + public void testTotalTermFreqCached() throws Exception { + Directory dir = newDirectory(); + IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer())); + Document d = new Document(); + d.add(newField("f", "a a b", Field.Index.ANALYZED)); + writer.addDocument(d); + IndexReader r = writer.getReader(); + writer.close(); + Terms terms = MultiFields.getTerms(r, "f"); + try { + // Make sure codec impls totalTermFreq (eg PreFlex doesn't) + Assume.assumeTrue(terms.totalTermFreq(new BytesRef("b")) != -1); + assertEquals(1, terms.totalTermFreq(new BytesRef("b"))); + assertEquals(2, terms.totalTermFreq(new BytesRef("a"))); + assertEquals(1, terms.totalTermFreq(new BytesRef("b"))); + } finally { + r.close(); + dir.close(); + } + } + + // LUCENE-2474 + public void testReaderFinishedListener() throws Exception { + Directory dir = newDirectory(); + IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).setMergePolicy(newLogMergePolicy())); + ((LogMergePolicy) writer.getConfig().getMergePolicy()).setMergeFactor(3); + writer.setInfoStream(VERBOSE ? System.out : null); + writer.addDocument(new Document()); + writer.commit(); + writer.addDocument(new Document()); + writer.commit(); + final IndexReader reader = writer.getReader(); + final int[] closeCount = new int[1]; + final IndexReader.ReaderFinishedListener listener = new IndexReader.ReaderFinishedListener() { + public void finished(IndexReader reader) { + closeCount[0]++; + } + }; + + reader.addReaderFinishedListener(listener); + + reader.close(); + + // Just the top reader + assertEquals(1, closeCount[0]); + writer.close(); + + // Now also the subs + assertEquals(3, closeCount[0]); + + IndexReader reader2 = IndexReader.open(dir); + reader2.addReaderFinishedListener(listener); + + closeCount[0] = 0; + reader2.close(); + assertEquals(3, closeCount[0]); + dir.close(); + } } diff --git a/lucene/src/test/org/apache/lucene/index/TestIndexReaderClone.java b/lucene/src/test/org/apache/lucene/index/TestIndexReaderClone.java index 9501c2dc3e2..7854aac9168 100644 --- a/lucene/src/test/org/apache/lucene/index/TestIndexReaderClone.java +++ b/lucene/src/test/org/apache/lucene/index/TestIndexReaderClone.java @@ -18,6 +18,7 @@ package org.apache.lucene.index; */ import org.apache.lucene.index.SegmentReader.Norm; +import org.apache.lucene.search.DefaultSimilarity; import org.apache.lucene.search.Similarity; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.document.Document; @@ -272,13 +273,14 @@ public class TestIndexReaderClone extends LuceneTestCase { * @throws Exception */ private void performDefaultTests(IndexReader r1) throws Exception { - float norm1 = Similarity.getDefault().decodeNormValue(MultiNorms.norms(r1, "field1")[4]); + Similarity sim = new DefaultSimilarity().get("field1"); + float norm1 = sim.decodeNormValue(MultiNorms.norms(r1, "field1")[4]); IndexReader pr1Clone = (IndexReader) r1.clone(); pr1Clone.deleteDocument(10); - pr1Clone.setNorm(4, "field1", 0.5f); - assertTrue(Similarity.getDefault().decodeNormValue(MultiNorms.norms(r1, "field1")[4]) == norm1); - assertTrue(Similarity.getDefault().decodeNormValue(MultiNorms.norms(pr1Clone, "field1")[4]) != norm1); + pr1Clone.setNorm(4, "field1", sim.encodeNormValue(0.5f)); + assertTrue(sim.decodeNormValue(MultiNorms.norms(r1, "field1")[4]) == norm1); + assertTrue(sim.decodeNormValue(MultiNorms.norms(pr1Clone, "field1")[4]) != norm1); final Bits delDocs = MultiFields.getDeletedDocs(r1); assertTrue(delDocs == null || !delDocs.get(10)); @@ -327,7 +329,8 @@ public class TestIndexReaderClone extends LuceneTestCase { TestIndexReaderReopen.createIndex(random, dir1, false); SegmentReader origSegmentReader = getOnlySegmentReader(IndexReader.open(dir1, false)); origSegmentReader.deleteDocument(1); - origSegmentReader.setNorm(4, "field1", 0.5f); + Similarity sim = new DefaultSimilarity().get("field1"); + origSegmentReader.setNorm(4, "field1", sim.encodeNormValue(0.5f)); SegmentReader clonedSegmentReader = (SegmentReader) origSegmentReader .clone(); @@ -426,8 +429,9 @@ public class TestIndexReaderClone extends LuceneTestCase { final Directory dir1 = newDirectory(); TestIndexReaderReopen.createIndex(random, dir1, false); IndexReader orig = IndexReader.open(dir1, false); - orig.setNorm(1, "field1", 17.0f); - final byte encoded = Similarity.getDefault().encodeNormValue(17.0f); + Similarity sim = new DefaultSimilarity().get("field1"); + orig.setNorm(1, "field1", sim.encodeNormValue(17.0f)); + final byte encoded = sim.encodeNormValue(17.0f); assertEquals(encoded, MultiNorms.norms(orig, "field1")[1]); // the cloned segmentreader should have 2 references, 1 to itself, and 1 to diff --git a/lucene/src/test/org/apache/lucene/index/TestIndexReaderCloneNorms.java b/lucene/src/test/org/apache/lucene/index/TestIndexReaderCloneNorms.java index 4a0e6b7162c..e2c4c017a95 100644 --- a/lucene/src/test/org/apache/lucene/index/TestIndexReaderCloneNorms.java +++ b/lucene/src/test/org/apache/lucene/index/TestIndexReaderCloneNorms.java @@ -32,6 +32,7 @@ import org.apache.lucene.index.IndexWriterConfig.OpenMode; import org.apache.lucene.index.SegmentReader.Norm; import org.apache.lucene.search.DefaultSimilarity; import org.apache.lucene.search.Similarity; +import org.apache.lucene.search.SimilarityProvider; import org.apache.lucene.store.Directory; import org.apache.lucene.util.LuceneTestCase; @@ -42,14 +43,15 @@ public class TestIndexReaderCloneNorms extends LuceneTestCase { private class SimilarityOne extends DefaultSimilarity { @Override - public float lengthNorm(String fieldName, int numTerms) { - return 1; + public float computeNorm(String fieldName, FieldInvertState state) { + // diable length norm + return state.getBoost(); } } private static final int NUM_FIELDS = 10; - private Similarity similarityOne; + private SimilarityProvider similarityOne; private Analyzer anlzr; @@ -202,19 +204,20 @@ public class TestIndexReaderCloneNorms extends LuceneTestCase { IndexReader reader4C = (IndexReader) reader3C.clone(); SegmentReader segmentReader4C = getOnlySegmentReader(reader4C); assertEquals(4, reader3CCNorm.bytesRef().get()); - reader4C.setNorm(5, "field1", 0.33f); + Similarity sim = new DefaultSimilarity().get("field1"); + reader4C.setNorm(5, "field1", sim.encodeNormValue(0.33f)); // generate a cannot update exception in reader1 try { - reader3C.setNorm(1, "field1", 0.99f); + reader3C.setNorm(1, "field1", sim.encodeNormValue(0.99f)); fail("did not hit expected exception"); } catch (Exception ex) { // expected } // norm values should be different - assertTrue(Similarity.getDefault().decodeNormValue(segmentReader3C.norms("field1")[5]) - != Similarity.getDefault().decodeNormValue(segmentReader4C.norms("field1")[5])); + assertTrue(sim.decodeNormValue(segmentReader3C.norms("field1")[5]) + != sim.decodeNormValue(segmentReader4C.norms("field1")[5])); Norm reader4CCNorm = segmentReader4C.norms.get("field1"); assertEquals(3, reader3CCNorm.bytesRef().get()); assertEquals(1, reader4CCNorm.bytesRef().get()); @@ -222,7 +225,7 @@ public class TestIndexReaderCloneNorms extends LuceneTestCase { IndexReader reader5C = (IndexReader) reader4C.clone(); SegmentReader segmentReader5C = getOnlySegmentReader(reader5C); Norm reader5CCNorm = segmentReader5C.norms.get("field1"); - reader5C.setNorm(5, "field1", 0.7f); + reader5C.setNorm(5, "field1", sim.encodeNormValue(0.7f)); assertEquals(1, reader5CCNorm.bytesRef().get()); reader5C.close(); @@ -236,7 +239,7 @@ public class TestIndexReaderCloneNorms extends LuceneTestCase { private void createIndex(Random random, Directory dir) throws IOException { IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, anlzr).setOpenMode(OpenMode.CREATE) - .setMaxBufferedDocs(5).setSimilarity(similarityOne)); + .setMaxBufferedDocs(5).setSimilarityProvider(similarityOne).setMergePolicy(newLogMergePolicy())); LogMergePolicy lmp = (LogMergePolicy) iw.getConfig().getMergePolicy(); lmp.setMergeFactor(3); lmp.setUseCompoundFile(true); @@ -255,8 +258,9 @@ public class TestIndexReaderCloneNorms extends LuceneTestCase { // System.out.println(" and: for "+k+" from "+newNorm+" to "+origNorm); modifiedNorms.set(i, Float.valueOf(newNorm)); modifiedNorms.set(k, Float.valueOf(origNorm)); - ir.setNorm(i, "f" + 1, newNorm); - ir.setNorm(k, "f" + 1, origNorm); + Similarity sim = new DefaultSimilarity().get("f" + 1); + ir.setNorm(i, "f" + 1, sim.encodeNormValue(newNorm)); + ir.setNorm(k, "f" + 1, sim.encodeNormValue(origNorm)); // System.out.println("setNorm i: "+i); // break; } @@ -276,7 +280,8 @@ public class TestIndexReaderCloneNorms extends LuceneTestCase { assertEquals("number of norms mismatches", numDocNorms, b.length); ArrayList storedNorms = (i == 1 ? modifiedNorms : norms); for (int j = 0; j < b.length; j++) { - float norm = Similarity.getDefault().decodeNormValue(b[j]); + Similarity sim = new DefaultSimilarity().get(field); + float norm = sim.decodeNormValue(b[j]); float norm1 = storedNorms.get(j).floatValue(); assertEquals("stored norm value of " + field + " for doc " + j + " is " + norm + " - a mismatch!", norm, norm1, 0.000001); @@ -288,7 +293,7 @@ public class TestIndexReaderCloneNorms extends LuceneTestCase { throws IOException { IndexWriterConfig conf = newIndexWriterConfig( TEST_VERSION_CURRENT, anlzr).setOpenMode(OpenMode.APPEND) - .setMaxBufferedDocs(5).setSimilarity(similarityOne); + .setMaxBufferedDocs(5).setSimilarityProvider(similarityOne).setMergePolicy(newLogMergePolicy()); LogMergePolicy lmp = (LogMergePolicy) conf.getMergePolicy(); lmp.setMergeFactor(3); lmp.setUseCompoundFile(compound); @@ -302,7 +307,7 @@ public class TestIndexReaderCloneNorms extends LuceneTestCase { // create the next document private Document newDoc() { Document d = new Document(); - float boost = nextNorm(); + float boost = nextNorm("anyfield"); // in this test the same similarity is used for all fields so it does not matter what field is passed for (int i = 0; i < 10; i++) { Field f = newField("f" + i, "v" + i, Store.NO, Index.NOT_ANALYZED); f.setBoost(boost); @@ -312,11 +317,12 @@ public class TestIndexReaderCloneNorms extends LuceneTestCase { } // return unique norm values that are unchanged by encoding/decoding - private float nextNorm() { + private float nextNorm(String fname) { float norm = lastNorm + normDelta; + Similarity sim = new DefaultSimilarity().get(fname); do { - float norm1 = Similarity.getDefault().decodeNormValue( - Similarity.getDefault().encodeNormValue(norm)); + float norm1 = sim.decodeNormValue( + sim.encodeNormValue(norm)); if (norm1 > lastNorm) { // System.out.println(norm1+" > "+lastNorm); norm = norm1; diff --git a/lucene/src/test/org/apache/lucene/index/TestIndexReaderReopen.java b/lucene/src/test/org/apache/lucene/index/TestIndexReaderReopen.java index ad9b3b30686..7e2e9e0c940 100644 --- a/lucene/src/test/org/apache/lucene/index/TestIndexReaderReopen.java +++ b/lucene/src/test/org/apache/lucene/index/TestIndexReaderReopen.java @@ -35,8 +35,11 @@ import org.apache.lucene.document.Field.Index; import org.apache.lucene.document.Field.Store; import org.apache.lucene.document.Field; import org.apache.lucene.index.IndexWriterConfig.OpenMode; +import org.apache.lucene.search.DefaultSimilarity; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.ScoreDoc; +import org.apache.lucene.search.Similarity; +import org.apache.lucene.search.SimilarityProvider; import org.apache.lucene.search.TermQuery; import org.apache.lucene.store.AlreadyClosedException; import org.apache.lucene.store.Directory; @@ -171,7 +174,7 @@ public class TestIndexReaderReopen extends LuceneTestCase { private void doTestReopenWithCommit (Random random, Directory dir, boolean withReopen) throws IOException { IndexWriter iwriter = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer()).setOpenMode( - OpenMode.CREATE).setMergeScheduler(new SerialMergeScheduler())); + OpenMode.CREATE).setMergeScheduler(new SerialMergeScheduler()).setMergePolicy(newInOrderLogMergePolicy())); iwriter.commit(); IndexReader reader = IndexReader.open(dir, false); try { @@ -614,8 +617,9 @@ public class TestIndexReaderReopen extends LuceneTestCase { IndexReader reader2 = reader1.reopen(); modifier = IndexReader.open(dir1, false); - modifier.setNorm(1, "field1", 50); - modifier.setNorm(1, "field2", 50); + SimilarityProvider sim = new DefaultSimilarity(); + modifier.setNorm(1, "field1", sim.get("field1").encodeNormValue(50f)); + modifier.setNorm(1, "field2", sim.get("field2").encodeNormValue(50f)); modifier.close(); IndexReader reader3 = reader2.reopen(); @@ -708,7 +712,8 @@ public class TestIndexReaderReopen extends LuceneTestCase { protected void modifyIndex(int i) throws IOException { if (i % 3 == 0) { IndexReader modifier = IndexReader.open(dir, false); - modifier.setNorm(i, "field1", 50); + Similarity sim = new DefaultSimilarity().get("field1"); + modifier.setNorm(i, "field1", sim.encodeNormValue(50f)); modifier.close(); } else if (i % 3 == 1) { IndexReader modifier = IndexReader.open(dir, false); @@ -768,14 +773,14 @@ public class TestIndexReaderReopen extends LuceneTestCase { // not synchronized IndexReader refreshed = r.reopen(); - IndexSearcher searcher = new IndexSearcher(refreshed); + IndexSearcher searcher = newSearcher(refreshed); ScoreDoc[] hits = searcher.search( new TermQuery(new Term("field1", "a" + rnd.nextInt(refreshed.maxDoc()))), null, 1000).scoreDocs; if (hits.length > 0) { searcher.doc(hits[0].doc); } - + searcher.close(); if (refreshed != r) { refreshed.close(); } @@ -976,7 +981,11 @@ public class TestIndexReaderReopen extends LuceneTestCase { static void modifyIndex(int i, Directory dir) throws IOException { switch (i) { case 0: { + if (VERBOSE) { + System.out.println("TEST: modify index"); + } IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer())); + w.setInfoStream(VERBOSE ? System.out : null); w.deleteDocuments(new Term("field2", "a11")); w.deleteDocuments(new Term("field2", "b30")); w.close(); @@ -984,9 +993,10 @@ public class TestIndexReaderReopen extends LuceneTestCase { } case 1: { IndexReader reader = IndexReader.open(dir, false); - reader.setNorm(4, "field1", 123); - reader.setNorm(44, "field2", 222); - reader.setNorm(44, "field4", 22); + SimilarityProvider sim = new DefaultSimilarity(); + reader.setNorm(4, "field1", sim.get("field1").encodeNormValue(123f)); + reader.setNorm(44, "field2", sim.get("field2").encodeNormValue(222f)); + reader.setNorm(44, "field4", sim.get("field4").encodeNormValue(22f)); reader.close(); break; } @@ -1007,8 +1017,9 @@ public class TestIndexReaderReopen extends LuceneTestCase { } case 4: { IndexReader reader = IndexReader.open(dir, false); - reader.setNorm(5, "field1", 123); - reader.setNorm(55, "field2", 222); + SimilarityProvider sim = new DefaultSimilarity(); + reader.setNorm(5, "field1", sim.get("field1").encodeNormValue(123f)); + reader.setNorm(55, "field2", sim.get("field2").encodeNormValue(222f)); reader.close(); break; } @@ -1200,7 +1211,6 @@ public class TestIndexReaderReopen extends LuceneTestCase { IndexReader r = IndexReader.open(dir, false); assertEquals(0, r.numDocs()); - assertEquals(4, r.maxDoc()); Collection commits = IndexReader.listCommits(dir); for (final IndexCommit commit : commits) { diff --git a/lucene/src/test/org/apache/lucene/index/TestIndexWriter.java b/lucene/src/test/org/apache/lucene/index/TestIndexWriter.java index 7f84850b4d9..298fdcf9e8d 100644 --- a/lucene/src/test/org/apache/lucene/index/TestIndexWriter.java +++ b/lucene/src/test/org/apache/lucene/index/TestIndexWriter.java @@ -43,7 +43,6 @@ import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.Fieldable; -import org.apache.lucene.index.codecs.CodecProvider; import org.apache.lucene.document.Field.Index; import org.apache.lucene.document.Field.Store; import org.apache.lucene.document.Field.TermVector; @@ -102,19 +101,12 @@ public class TestIndexWriter extends LuceneTestCase { } reader.close(); - // test doc count before segments are merged/index is optimized - writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer())); - assertEquals(100, writer.maxDoc()); - writer.close(); - reader = IndexReader.open(dir, true); - assertEquals(100, reader.maxDoc()); assertEquals(60, reader.numDocs()); reader.close(); // optimize the index and check that the new doc count is correct writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer())); - assertEquals(100, writer.maxDoc()); assertEquals(60, writer.numDocs()); writer.optimize(); assertEquals(60, writer.maxDoc()); @@ -157,7 +149,7 @@ public class TestIndexWriter extends LuceneTestCase { String[] startFiles = dir.listAll(); SegmentInfos infos = new SegmentInfos(); infos.read(dir); - new IndexFileDeleter(dir, new KeepOnlyLastCommitDeletionPolicy(), infos, null, CodecProvider.getDefault()); + new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer())).rollback(); String[] endFiles = dir.listAll(); Arrays.sort(startFiles); @@ -261,7 +253,7 @@ public class TestIndexWriter extends LuceneTestCase { public void testOptimizeTempSpaceUsage() throws IOException { MockDirectoryWrapper dir = newDirectory(); - IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer()).setMaxBufferedDocs(10)); + IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer()).setMaxBufferedDocs(10).setMergePolicy(newLogMergePolicy())); if (VERBOSE) { System.out.println("TEST: config1=" + writer.getConfig()); } @@ -785,7 +777,7 @@ public class TestIndexWriter extends LuceneTestCase { public void testHighFreqTerm() throws IOException { MockDirectoryWrapper dir = newDirectory(); IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( - TEST_VERSION_CURRENT, new MockAnalyzer()).setMaxFieldLength(100000000).setRAMBufferSizeMB(0.01)); + TEST_VERSION_CURRENT, new MockAnalyzer()).setRAMBufferSizeMB(0.01)); // Massive doc that has 128 K a's StringBuilder b = new StringBuilder(1024*1024); for(int i=0;i<4096;i++) { @@ -969,7 +961,7 @@ public class TestIndexWriter extends LuceneTestCase { Directory dir = newDirectory(); IndexWriterConfig conf = newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer()) - .setMaxBufferedDocs(2); + .setMaxBufferedDocs(2).setMergePolicy(newLogMergePolicy()); ((LogMergePolicy) conf.getMergePolicy()).setMergeFactor(2); IndexWriter iw = new IndexWriter(dir, conf); Document document = new Document(); @@ -1011,7 +1003,7 @@ public class TestIndexWriter extends LuceneTestCase { Directory dir = newDirectory(); IndexWriterConfig conf = newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer()) - .setMergeScheduler(new MyMergeScheduler()).setMaxBufferedDocs(2); + .setMergeScheduler(new MyMergeScheduler()).setMaxBufferedDocs(2).setMergePolicy(newLogMergePolicy()); LogMergePolicy lmp = (LogMergePolicy) conf.getMergePolicy(); lmp.setMaxMergeDocs(20); lmp.setMergeFactor(2); @@ -1034,11 +1026,11 @@ public class TestIndexWriter extends LuceneTestCase { if (VERBOSE) { System.out.println("TEST: iter=" + i); } - IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer()).setMaxBufferedDocs(2)); + IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer()).setMaxBufferedDocs(2).setMergePolicy(newLogMergePolicy())); writer.setInfoStream(VERBOSE ? System.out : null); - LogMergePolicy lmp = (LogMergePolicy) writer.getConfig().getMergePolicy(); - lmp.setMergeFactor(2); - lmp.setUseCompoundFile(false); + //LogMergePolicy lmp = (LogMergePolicy) writer.getConfig().getMergePolicy(); + //lmp.setMergeFactor(2); + //lmp.setUseCompoundFile(false); Document doc = new Document(); String contents = "aa bb cc dd ee ff gg hh ii jj kk"; @@ -1072,8 +1064,8 @@ public class TestIndexWriter extends LuceneTestCase { if (0 == i % 4) { writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer())); - LogMergePolicy lmp2 = (LogMergePolicy) writer.getConfig().getMergePolicy(); - lmp2.setUseCompoundFile(false); + //LogMergePolicy lmp2 = (LogMergePolicy) writer.getConfig().getMergePolicy(); + //lmp2.setUseCompoundFile(false); writer.optimize(); writer.close(); } @@ -1098,10 +1090,7 @@ public class TestIndexWriter extends LuceneTestCase { newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()). setOpenMode(OpenMode.CREATE). setMaxBufferedDocs(2). - // have to use compound file to prevent running out of - // descripters when newDirectory returns a file-system - // backed directory: - setMergePolicy(newLogMergePolicy(false, 10)) + setMergePolicy(newLogMergePolicy()) ); writer.setInfoStream(VERBOSE ? System.out : null); @@ -1167,7 +1156,7 @@ public class TestIndexWriter extends LuceneTestCase { reader.close(); // Reopen - writer = new IndexWriter(directory, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer()).setOpenMode(OpenMode.APPEND)); + writer = new IndexWriter(directory, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer()).setOpenMode(OpenMode.APPEND).setMergePolicy(newLogMergePolicy())); writer.setInfoStream(VERBOSE ? System.out : null); } writer.close(); @@ -1237,30 +1226,7 @@ public class TestIndexWriter extends LuceneTestCase { writer.close(); dir.close(); } - - // LUCENE-1084: test user-specified field length - public void testUserSpecifiedMaxFieldLength() throws IOException { - Directory dir = newDirectory(); - - IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig( - TEST_VERSION_CURRENT, new MockAnalyzer()).setMaxFieldLength(100000)); - - Document doc = new Document(); - StringBuilder b = new StringBuilder(); - for(int i=0;i<10000;i++) - b.append(" a"); - b.append(" x"); - doc.add(newField("field", b.toString(), Field.Store.NO, Field.Index.ANALYZED)); - writer.addDocument(doc); - writer.close(); - - IndexReader reader = IndexReader.open(dir, true); - Term t = new Term("field", "x"); - assertEquals(1, reader.docFreq(t)); - reader.close(); - dir.close(); - } - + // LUCENE-325: test expungeDeletes, when 2 singular merges // are required public void testExpungeDeletes() throws IOException { @@ -1268,8 +1234,8 @@ public class TestIndexWriter extends LuceneTestCase { IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer()) .setMaxBufferedDocs(2).setRAMBufferSizeMB( - IndexWriterConfig.DISABLE_AUTO_FLUSH)); - + IndexWriterConfig.DISABLE_AUTO_FLUSH)); + writer.setInfoStream(VERBOSE ? System.out : null); Document document = new Document(); document = new Document(); @@ -1292,7 +1258,7 @@ public class TestIndexWriter extends LuceneTestCase { assertEquals(8, ir.numDocs()); ir.close(); - writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer())); + writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer()).setMergePolicy(newLogMergePolicy())); assertEquals(8, writer.numDocs()); assertEquals(10, writer.maxDoc()); writer.expungeDeletes(); @@ -1455,7 +1421,6 @@ public class TestIndexWriter extends LuceneTestCase { w.close(); IndexReader ir = IndexReader.open(dir, true); - assertEquals(1, ir.maxDoc()); assertEquals(0, ir.numDocs()); ir.close(); @@ -2034,7 +1999,6 @@ public class TestIndexWriter extends LuceneTestCase { } IndexWriterConfig conf = newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer()).setMaxBufferedDocs(2); - ((LogMergePolicy) conf.getMergePolicy()).setMergeFactor(2); w = new IndexWriter(dir, conf); Document doc = new Document(); @@ -2253,8 +2217,6 @@ public class TestIndexWriter extends LuceneTestCase { Directory dir = newDirectory(); IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer())); - LogMergePolicy lmp = (LogMergePolicy) writer.getConfig().getMergePolicy(); - lmp.setUseCompoundFile(false); ByteArrayOutputStream bos = new ByteArrayOutputStream(1024); writer.setInfoStream(new PrintStream(bos)); writer.addDocument(new Document()); @@ -2271,7 +2233,8 @@ public class TestIndexWriter extends LuceneTestCase { final int NUM_THREADS = 5; final double RUN_SEC = 0.5; final Directory dir = newDirectory(); - final RandomIndexWriter w = new RandomIndexWriter(random, dir); + final RandomIndexWriter w = new RandomIndexWriter(random, dir, newIndexWriterConfig( + TEST_VERSION_CURRENT, new MockAnalyzer()).setMergePolicy(newLogMergePolicy())); _TestUtil.reduceOpenFiles(w.w); w.commit(); final AtomicBoolean failed = new AtomicBoolean(); @@ -2613,7 +2576,7 @@ public class TestIndexWriter extends LuceneTestCase { count++; } } - assertTrue("flush happened too quickly during " + (doIndexing ? "indexing" : "deleting") + " count=" + count, count > 2500); + assertTrue("flush happened too quickly during " + (doIndexing ? "indexing" : "deleting") + " count=" + count, count > 1500); } w.close(); dir.close(); @@ -2653,9 +2616,11 @@ public class TestIndexWriter extends LuceneTestCase { Directory dir = newDirectory(); IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer()) - .setMaxBufferedDocs(2)); + .setMaxBufferedDocs(2).setMergePolicy(newLogMergePolicy())); String[] files = dir.listAll(); + writer.setInfoStream(VERBOSE ? System.out : null); + // Creating over empty dir should not create any files, // or, at most the write.lock file final int extraFileCount; @@ -2677,9 +2642,10 @@ public class TestIndexWriter extends LuceneTestCase { doc = new Document(); doc.add(newField("c", "val", Store.YES, Index.ANALYZED, TermVector.WITH_POSITIONS_OFFSETS)); writer.addDocument(doc); + // The second document should cause a flush. - assertTrue("flush should have occurred and files created", dir.listAll().length > 5 + extraFileCount); - + assertTrue("flush should have occurred and files should have been created", dir.listAll().length > 5 + extraFileCount); + // After rollback, IW should remove all files writer.rollback(); assertEquals("no files should exist in the directory after rollback", 0, dir.listAll().length); @@ -2851,7 +2817,7 @@ public class TestIndexWriter extends LuceneTestCase { for(int x=0;x<2;x++) { IndexReader r = w.getReader(); - IndexSearcher s = new IndexSearcher(r); + IndexSearcher s = newSearcher(r); if (VERBOSE) { System.out.println("TEST: cycle x=" + x + " r=" + r); @@ -2867,6 +2833,7 @@ public class TestIndexWriter extends LuceneTestCase { assertEquals("doc " + testID + ", field f" + fieldCount + " is wrong", docExp.get("f"+i), doc.get("f"+i)); } } + s.close(); r.close(); w.optimize(); } @@ -2878,7 +2845,7 @@ public class TestIndexWriter extends LuceneTestCase { public void testNoUnwantedTVFiles() throws Exception { Directory dir = newDirectory(); - IndexWriter indexWriter = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).setRAMBufferSizeMB(0.01)); + IndexWriter indexWriter = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).setRAMBufferSizeMB(0.01).setMergePolicy(newLogMergePolicy())); ((LogMergePolicy) indexWriter.getConfig().getMergePolicy()).setUseCompoundFile(false); String BIG="alskjhlaksjghlaksjfhalksvjepgjioefgjnsdfjgefgjhelkgjhqewlrkhgwlekgrhwelkgjhwelkgrhwlkejg"; @@ -2907,4 +2874,36 @@ public class TestIndexWriter extends LuceneTestCase { dir.close(); } + + public void testDeleteAllSlowly() throws Exception { + final Directory dir = newDirectory(); + RandomIndexWriter w = new RandomIndexWriter(random, dir); + final int NUM_DOCS = 1000 * RANDOM_MULTIPLIER; + final List ids = new ArrayList(NUM_DOCS); + for(int id=0;id= lastCount); lastCount = count; } @@ -799,7 +803,9 @@ public class TestIndexWriterReader extends LuceneTestCase { r = r2; } Query q = new TermQuery(new Term("indexname", "test")); - final int count = new IndexSearcher(r).search(q, 10).totalHits; + IndexSearcher searcher = newSearcher(r); + final int count = searcher.search(q, 10).totalHits; + searcher.close(); assertTrue(count >= lastCount); assertEquals(0, excs.size()); @@ -872,7 +878,9 @@ public class TestIndexWriterReader extends LuceneTestCase { r = r2; } Query q = new TermQuery(new Term("indexname", "test")); - sum += new IndexSearcher(r).search(q, 10).totalHits; + IndexSearcher searcher = newSearcher(r); + sum += searcher.search(q, 10).totalHits; + searcher.close(); } for(int i=0;i 0); assertEquals(0, excs.size()); @@ -899,7 +908,7 @@ public class TestIndexWriterReader extends LuceneTestCase { public void testExpungeDeletes() throws Throwable { Directory dir = newDirectory(); - final IndexWriter w = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer())); + final IndexWriter w = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer()).setMergePolicy(newLogMergePolicy())); Document doc = new Document(); doc.add(newField("field", "a b c", Field.Store.NO, Field.Index.ANALYZED)); Field id = newField("id", "", Field.Store.NO, Field.Index.NOT_ANALYZED); @@ -970,11 +979,13 @@ public class TestIndexWriterReader extends LuceneTestCase { setMaxBufferedDocs(2). setReaderPooling(true). setMergedSegmentWarmer(new IndexWriter.IndexReaderWarmer() { + @Override public void warm(IndexReader r) throws IOException { - IndexSearcher s = new IndexSearcher(r); + IndexSearcher s = newSearcher(r); TopDocs hits = s.search(new TermQuery(new Term("foo", "bar")), 10); assertEquals(20, hits.totalHits); didWarm.set(true); + s.close(); } }). setMergePolicy(newLogMergePolicy(10)) @@ -990,4 +1001,35 @@ public class TestIndexWriterReader extends LuceneTestCase { dir.close(); assertTrue(didWarm.get()); } + + public void testNoTermsIndex() throws Exception { + // Some Codecs don't honor the ReaderTermsIndexDivisor, so skip the test if + // they're picked. + HashSet illegalCodecs = new HashSet(); + illegalCodecs.add("PreFlex"); + illegalCodecs.add("SimpleText"); + + IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, + new MockAnalyzer()).setReaderTermsIndexDivisor(-1); + // Don't proceed if picked Codec is in the list of illegal ones. + if (illegalCodecs.contains(conf.getCodecProvider().getFieldCodec("f"))) return; + + Directory dir = newDirectory(); + IndexWriter w = new IndexWriter(dir, conf); + Document doc = new Document(); + doc.add(new Field("f", "val", Store.NO, Index.ANALYZED)); + w.addDocument(doc); + IndexReader r = IndexReader.open(w, true).getSequentialSubReaders()[0]; + try { + r.termDocsEnum(null, "f", new BytesRef("val")); + fail("should have failed to seek since terms index was not loaded. Codec used " + conf.getCodecProvider().getFieldCodec("f")); + } catch (IllegalStateException e) { + // expected - we didn't load the term index + } finally { + r.close(); + w.close(); + dir.close(); + } + } + } diff --git a/lucene/src/test/org/apache/lucene/index/TestLazyBug.java b/lucene/src/test/org/apache/lucene/index/TestLazyBug.java index 58681ab847f..13b668417c9 100755 --- a/lucene/src/test/org/apache/lucene/index/TestLazyBug.java +++ b/lucene/src/test/org/apache/lucene/index/TestLazyBug.java @@ -63,7 +63,7 @@ public class TestLazyBug extends LuceneTestCase { Directory dir = newDirectory(); try { IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( - TEST_VERSION_CURRENT, new MockAnalyzer())); + TEST_VERSION_CURRENT, new MockAnalyzer()).setMergePolicy(newLogMergePolicy())); LogMergePolicy lmp = (LogMergePolicy) writer.getConfig().getMergePolicy(); lmp.setUseCompoundFile(false); diff --git a/lucene/src/test/org/apache/lucene/index/TestLazyProxSkipping.java b/lucene/src/test/org/apache/lucene/index/TestLazyProxSkipping.java index 469302fdb4e..8bd31b4a73c 100755 --- a/lucene/src/test/org/apache/lucene/index/TestLazyProxSkipping.java +++ b/lucene/src/test/org/apache/lucene/index/TestLazyProxSkipping.java @@ -98,7 +98,7 @@ public class TestLazyProxSkipping extends LuceneTestCase { SegmentReader reader = getOnlySegmentReader(IndexReader.open(directory, false)); - this.searcher = new IndexSearcher(reader); + this.searcher = newSearcher(reader); } private ScoreDoc[] search() throws IOException { @@ -126,7 +126,9 @@ public class TestLazyProxSkipping extends LuceneTestCase { // test whether only the minimum amount of seeks() // are performed performTest(5); + searcher.close(); performTest(10); + searcher.close(); } public void testSeek() throws IOException { diff --git a/lucene/src/test/org/apache/lucene/index/TestMaxTermFrequency.java b/lucene/src/test/org/apache/lucene/index/TestMaxTermFrequency.java new file mode 100644 index 00000000000..fe1f29be001 --- /dev/null +++ b/lucene/src/test/org/apache/lucene/index/TestMaxTermFrequency.java @@ -0,0 +1,116 @@ +package org.apache.lucene.index; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; + +import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.analysis.MockTokenizer; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.search.DefaultSimilarity; +import org.apache.lucene.store.Directory; +import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.util._TestUtil; + +/** + * Tests the maxTermFrequency statistic in FieldInvertState + */ +public class TestMaxTermFrequency extends LuceneTestCase { + Directory dir; + IndexReader reader; + /* expected maxTermFrequency values for our documents */ + ArrayList expected = new ArrayList(); + + @Override + public void setUp() throws Exception { + super.setUp(); + dir = newDirectory(); + IndexWriterConfig config = newIndexWriterConfig(TEST_VERSION_CURRENT, + new MockAnalyzer(MockTokenizer.SIMPLE, true)).setMergePolicy(newInOrderLogMergePolicy()); + config.setSimilarityProvider(new TestSimilarity()); + RandomIndexWriter writer = new RandomIndexWriter(random, dir, config); + Document doc = new Document(); + Field foo = newField("foo", "", Field.Store.NO, Field.Index.ANALYZED); + doc.add(foo); + for (int i = 0; i < 100; i++) { + foo.setValue(addValue()); + writer.addDocument(doc); + } + reader = writer.getReader(); + writer.close(); + } + + @Override + public void tearDown() throws Exception { + reader.close(); + dir.close(); + super.tearDown(); + } + + public void test() throws Exception { + byte fooNorms[] = MultiNorms.norms(reader, "foo"); + for (int i = 0; i < reader.maxDoc(); i++) + assertEquals(expected.get(i).intValue(), fooNorms[i] & 0xff); + } + + /** + * Makes a bunch of single-char tokens (the max freq will at most be 255). + * shuffles them around, and returns the whole list with Arrays.toString(). + * This works fine because we use lettertokenizer. + * puts the max-frequency term into expected, to be checked against the norm. + */ + private String addValue() { + List terms = new ArrayList(); + int maxCeiling = _TestUtil.nextInt(random, 0, 255); + int max = 0; + for (char ch = 'a'; ch <= 'z'; ch++) { + int num = _TestUtil.nextInt(random, 0, maxCeiling); + for (int i = 0; i < num; i++) + terms.add(Character.toString(ch)); + max = Math.max(max, num); + } + expected.add(max); + Collections.shuffle(terms, random); + return Arrays.toString(terms.toArray(new String[terms.size()])); + } + + /** + * Simple similarity that encodes maxTermFrequency directly as a byte + */ + class TestSimilarity extends DefaultSimilarity { + + @Override + public byte encodeNormValue(float f) { + return (byte) f; + } + + @Override + public float decodeNormValue(byte b) { + return (float) b; + } + + @Override + public float computeNorm(String field, FieldInvertState state) { + return (float) state.getMaxTermFrequency(); + } + } +} diff --git a/lucene/src/test/org/apache/lucene/index/TestMultiFields.java b/lucene/src/test/org/apache/lucene/index/TestMultiFields.java index 5fb836dd0c5..f1337e95191 100644 --- a/lucene/src/test/org/apache/lucene/index/TestMultiFields.java +++ b/lucene/src/test/org/apache/lucene/index/TestMultiFields.java @@ -97,6 +97,9 @@ public class TestMultiFields extends LuceneTestCase { for(int i=0;i<100;i++) { BytesRef term = terms.get(random.nextInt(terms.size())); + if (VERBOSE) { + System.out.println("TEST: seek to term= "+ UnicodeUtil.toHexString(term.utf8ToString())); + } DocsEnum docsEnum = terms2.docs(delDocs, term, null); assertNotNull(docsEnum); diff --git a/lucene/src/test/org/apache/lucene/index/TestMultiLevelSkipList.java b/lucene/src/test/org/apache/lucene/index/TestMultiLevelSkipList.java index 10dbc4f9fb0..92c3689612a 100644 --- a/lucene/src/test/org/apache/lucene/index/TestMultiLevelSkipList.java +++ b/lucene/src/test/org/apache/lucene/index/TestMultiLevelSkipList.java @@ -53,6 +53,7 @@ public class TestMultiLevelSkipList extends LuceneTestCase { super(random, delegate); } + @Override public IndexInput openInput(String fileName) throws IOException { IndexInput in = super.openInput(fileName); if (fileName.endsWith(".frq")) @@ -61,6 +62,7 @@ public class TestMultiLevelSkipList extends LuceneTestCase { } } + @Override @Before public void setUp() throws Exception { super.setUp(); @@ -69,7 +71,7 @@ public class TestMultiLevelSkipList extends LuceneTestCase { public void testSimpleSkip() throws IOException { Directory dir = new CountingRAMDirectory(new RAMDirectory()); - IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new PayloadAnalyzer()).setCodecProvider(_TestUtil.alwaysCodec("Standard"))); + IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new PayloadAnalyzer()).setCodecProvider(_TestUtil.alwaysCodec("Standard")).setMergePolicy(newInOrderLogMergePolicy())); Term term = new Term("test", "a"); for (int i = 0; i < 5000; i++) { Document d1 = new Document(); diff --git a/lucene/src/test/org/apache/lucene/index/TestNRTThreads.java b/lucene/src/test/org/apache/lucene/index/TestNRTThreads.java index 56a18c73fd6..c0c3d3c8332 100644 --- a/lucene/src/test/org/apache/lucene/index/TestNRTThreads.java +++ b/lucene/src/test/org/apache/lucene/index/TestNRTThreads.java @@ -23,6 +23,7 @@ import java.util.ArrayList; import java.util.Collections; import java.util.List; import java.util.Set; +import java.util.HashSet; import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.Executors; @@ -39,6 +40,7 @@ import org.apache.lucene.search.Sort; import org.apache.lucene.search.SortField; import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.TopDocs; +import org.apache.lucene.document.Field; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.store.MockDirectoryWrapper; import org.apache.lucene.util.NamedThreadFactory; @@ -65,7 +67,7 @@ public class TestNRTThreads extends LuceneTestCase { CodecProvider.getDefault().setDefaultFieldCodec("Standard"); } - final LineFileDocs docs = new LineFileDocs(true); + final LineFileDocs docs = new LineFileDocs(random); final File tempDir = _TestUtil.getTempDir("nrtopenfiles"); final MockDirectoryWrapper dir = new MockDirectoryWrapper(random, FSDirectory.open(tempDir)); final IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()); @@ -86,14 +88,16 @@ public class TestNRTThreads extends LuceneTestCase { } } - sum += new IndexSearcher(reader).search(new TermQuery(new Term("body", "united")), 10).totalHits; + IndexSearcher searcher = newSearcher(reader); + sum += searcher.search(new TermQuery(new Term("body", "united")), 10).totalHits; + searcher.close(); if (VERBOSE) { System.out.println("TEST: warm visited " + sum + " fields"); } } }); - + final IndexWriter writer = new IndexWriter(dir, conf); if (VERBOSE) { writer.setInfoStream(System.out); @@ -104,10 +108,12 @@ public class TestNRTThreads extends LuceneTestCase { ((ConcurrentMergeScheduler) ms).setMaxThreadCount(1); ((ConcurrentMergeScheduler) ms).setMaxMergeCount(1); } + /* LogMergePolicy lmp = (LogMergePolicy) writer.getConfig().getMergePolicy(); if (lmp.getMergeFactor() > 5) { lmp.setMergeFactor(5); } + */ final int NUM_INDEX_THREADS = 2; final int NUM_SEARCH_THREADS = 3; @@ -117,7 +123,7 @@ public class TestNRTThreads extends LuceneTestCase { final AtomicInteger addCount = new AtomicInteger(); final AtomicInteger delCount = new AtomicInteger(); - final List delIDs = Collections.synchronizedList(new ArrayList()); + final Set delIDs = Collections.synchronizedSet(new HashSet()); final long stopTime = System.currentTimeMillis() + RUN_TIME_SEC*1000; Thread[] threads = new Thread[NUM_INDEX_THREADS]; @@ -132,22 +138,29 @@ public class TestNRTThreads extends LuceneTestCase { if (doc == null) { break; } + final String addedField; + if (random.nextBoolean()) { + addedField = "extra" + random.nextInt(10); + doc.add(new Field(addedField, "a random field", Field.Store.NO, Field.Index.ANALYZED)); + } else { + addedField = null; + } if (random.nextBoolean()) { if (VERBOSE) { - //System.out.println(Thread.currentThread().getName() + ": add doc id:" + doc.get("id")); + System.out.println(Thread.currentThread().getName() + ": add doc id:" + doc.get("id")); } writer.addDocument(doc); } else { // we use update but it never replaces a // prior doc if (VERBOSE) { - //System.out.println(Thread.currentThread().getName() + ": update doc id:" + doc.get("id")); + System.out.println(Thread.currentThread().getName() + ": update doc id:" + doc.get("id")); } writer.updateDocument(new Term("id", doc.get("id")), doc); } if (random.nextInt(5) == 3) { if (VERBOSE) { - //System.out.println(Thread.currentThread().getName() + ": buffer del id:" + doc.get("id")); + System.out.println(Thread.currentThread().getName() + ": buffer del id:" + doc.get("id")); } toDeleteIDs.add(doc.get("id")); } @@ -156,6 +169,9 @@ public class TestNRTThreads extends LuceneTestCase { System.out.println(Thread.currentThread().getName() + ": apply " + toDeleteIDs.size() + " deletes"); } for(String id : toDeleteIDs) { + if (VERBOSE) { + System.out.println(Thread.currentThread().getName() + ": del term=id:" + id); + } writer.deleteDocuments(new Term("id", id)); } final int count = delCount.addAndGet(toDeleteIDs.size()); @@ -166,6 +182,9 @@ public class TestNRTThreads extends LuceneTestCase { toDeleteIDs.clear(); } addCount.getAndIncrement(); + if (addedField != null) { + doc.removeField(addedField); + } } catch (Exception exc) { System.out.println(Thread.currentThread().getName() + ": hit exc"); exc.printStackTrace(); @@ -189,7 +208,7 @@ public class TestNRTThreads extends LuceneTestCase { // let index build up a bit Thread.sleep(100); - IndexReader r = IndexReader.open(writer); + IndexReader r = IndexReader.open(writer, true); boolean any = false; // silly starting guess: @@ -222,7 +241,7 @@ public class TestNRTThreads extends LuceneTestCase { if (VERBOSE) { System.out.println("TEST: now open"); } - r = IndexReader.open(writer); + r = IndexReader.open(writer, true); } if (VERBOSE) { System.out.println("TEST: got new reader=" + r); @@ -335,20 +354,39 @@ public class TestNRTThreads extends LuceneTestCase { } final IndexReader r2 = writer.getReader(); - final IndexSearcher s = new IndexSearcher(r2); + final IndexSearcher s = newSearcher(r2); + boolean doFail = false; for(String id : delIDs) { final TopDocs hits = s.search(new TermQuery(new Term("id", id)), 1); if (hits.totalHits != 0) { - fail("doc id=" + id + " is supposed to be deleted, but got docID=" + hits.scoreDocs[0].doc); + System.out.println("doc id=" + id + " is supposed to be deleted, but got docID=" + hits.scoreDocs[0].doc); + doFail = true; } } + + final int endID = Integer.parseInt(docs.nextDoc().get("id")); + for(int id=0;id norms; @@ -150,7 +152,7 @@ public class TestNorms extends LuceneTestCase { private void createIndex(Random random, Directory dir) throws IOException { IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, anlzr).setOpenMode(OpenMode.CREATE) - .setMaxBufferedDocs(5).setSimilarity(similarityOne)); + .setMaxBufferedDocs(5).setSimilarityProvider(similarityOne).setMergePolicy(newInOrderLogMergePolicy())); LogMergePolicy lmp = (LogMergePolicy) iw.getConfig().getMergePolicy(); lmp.setMergeFactor(3); lmp.setUseCompoundFile(true); @@ -168,8 +170,9 @@ public class TestNorms extends LuceneTestCase { //System.out.println(" and: for "+k+" from "+newNorm+" to "+origNorm); modifiedNorms.set(i, Float.valueOf(newNorm)); modifiedNorms.set(k, Float.valueOf(origNorm)); - ir.setNorm(i, "f"+1, newNorm); - ir.setNorm(k, "f"+1, origNorm); + Similarity sim = new DefaultSimilarity().get("f"+1); + ir.setNorm(i, "f"+1, sim.encodeNormValue(newNorm)); + ir.setNorm(k, "f"+1, sim.encodeNormValue(origNorm)); } ir.close(); } @@ -183,7 +186,7 @@ public class TestNorms extends LuceneTestCase { assertEquals("number of norms mismatches",numDocNorms,b.length); ArrayList storedNorms = (i==1 ? modifiedNorms : norms); for (int j = 0; j < b.length; j++) { - float norm = similarityOne.decodeNormValue(b[j]); + float norm = similarityOne.get(field).decodeNormValue(b[j]); float norm1 = storedNorms.get(j).floatValue(); assertEquals("stored norm value of "+field+" for doc "+j+" is "+norm+" - a mismatch!", norm, norm1, 0.000001); } @@ -194,7 +197,7 @@ public class TestNorms extends LuceneTestCase { private void addDocs(Random random, Directory dir, int ndocs, boolean compound) throws IOException { IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, anlzr).setOpenMode(OpenMode.APPEND) - .setMaxBufferedDocs(5).setSimilarity(similarityOne)); + .setMaxBufferedDocs(5).setSimilarityProvider(similarityOne).setMergePolicy(newInOrderLogMergePolicy())); LogMergePolicy lmp = (LogMergePolicy) iw.getConfig().getMergePolicy(); lmp.setMergeFactor(3); lmp.setUseCompoundFile(compound); @@ -207,7 +210,7 @@ public class TestNorms extends LuceneTestCase { // create the next document private Document newDoc() { Document d = new Document(); - float boost = nextNorm(); + float boost = nextNorm("anyfield"); // in this test the same similarity is used for all fields so it does not matter what field is passed for (int i = 0; i < 10; i++) { Field f = newField("f"+i,"v"+i,Store.NO,Index.NOT_ANALYZED); f.setBoost(boost); @@ -217,10 +220,11 @@ public class TestNorms extends LuceneTestCase { } // return unique norm values that are unchanged by encoding/decoding - private float nextNorm() { + private float nextNorm(String fname) { float norm = lastNorm + normDelta; + Similarity similarity = similarityOne.get(fname); do { - float norm1 = similarityOne.decodeNormValue(similarityOne.encodeNormValue(norm)); + float norm1 = similarity.decodeNormValue(similarity.encodeNormValue(norm)); if (norm1 > lastNorm) { //System.out.println(norm1+" > "+lastNorm); norm = norm1; @@ -236,4 +240,52 @@ public class TestNorms extends LuceneTestCase { return norm; } + class CustomNormEncodingSimilarity extends DefaultSimilarity { + @Override + public byte encodeNormValue(float f) { + return (byte) f; + } + + @Override + public float decodeNormValue(byte b) { + return (float) b; + } + + @Override + public float computeNorm(String field, FieldInvertState state) { + return (float) state.getLength(); + } + } + + // LUCENE-1260 + public void testCustomEncoder() throws Exception { + Directory dir = newDirectory(); + IndexWriterConfig config = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()); + config.setSimilarityProvider(new CustomNormEncodingSimilarity()); + RandomIndexWriter writer = new RandomIndexWriter(random, dir, config); + Document doc = new Document(); + Field foo = newField("foo", "", Field.Store.NO, Field.Index.ANALYZED); + Field bar = newField("bar", "", Field.Store.NO, Field.Index.ANALYZED); + doc.add(foo); + doc.add(bar); + + for (int i = 0; i < 100; i++) { + bar.setValue("singleton"); + writer.addDocument(doc); + } + + IndexReader reader = writer.getReader(); + writer.close(); + + byte fooNorms[] = MultiNorms.norms(reader, "foo"); + for (int i = 0; i < reader.maxDoc(); i++) + assertEquals(0, fooNorms[i]); + + byte barNorms[] = MultiNorms.norms(reader, "bar"); + for (int i = 0; i < reader.maxDoc(); i++) + assertEquals(1, barNorms[i]); + + reader.close(); + dir.close(); + } } diff --git a/lucene/src/test/org/apache/lucene/index/TestOmitNorms.java b/lucene/src/test/org/apache/lucene/index/TestOmitNorms.java new file mode 100644 index 00000000000..bc6b713db69 --- /dev/null +++ b/lucene/src/test/org/apache/lucene/index/TestOmitNorms.java @@ -0,0 +1,303 @@ +package org.apache.lucene.index; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; + +import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.util._TestUtil; +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.store.Directory; + +public class TestOmitNorms extends LuceneTestCase { + // Tests whether the DocumentWriter correctly enable the + // omitNorms bit in the FieldInfo + public void testOmitNorms() throws Exception { + Directory ram = newDirectory(); + Analyzer analyzer = new MockAnalyzer(); + IndexWriter writer = new IndexWriter(ram, newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer)); + Document d = new Document(); + + // this field will have norms + Field f1 = newField("f1", "This field has norms", Field.Store.NO, Field.Index.ANALYZED); + d.add(f1); + + // this field will NOT have norms + Field f2 = newField("f2", "This field has NO norms in all docs", Field.Store.NO, Field.Index.ANALYZED); + f2.setOmitNorms(true); + d.add(f2); + + writer.addDocument(d); + writer.optimize(); + // now we add another document which has term freq for field f2 and not for f1 and verify if the SegmentMerger + // keep things constant + d = new Document(); + + // Reverse + f1.setOmitNorms(true); + d.add(f1); + + f2.setOmitNorms(false); + d.add(f2); + + writer.addDocument(d); + + // force merge + writer.optimize(); + // flush + writer.close(); + _TestUtil.checkIndex(ram); + + SegmentReader reader = getOnlySegmentReader(IndexReader.open(ram, false)); + FieldInfos fi = reader.fieldInfos(); + assertTrue("OmitNorms field bit should be set.", fi.fieldInfo("f1").omitNorms); + assertTrue("OmitNorms field bit should be set.", fi.fieldInfo("f2").omitNorms); + + reader.close(); + ram.close(); + } + + // Tests whether merging of docs that have different + // omitNorms for the same field works + public void testMixedMerge() throws Exception { + Directory ram = newDirectory(); + Analyzer analyzer = new MockAnalyzer(); + IndexWriter writer = new IndexWriter( + ram, + newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer). + setMaxBufferedDocs(3). + setMergePolicy(newLogMergePolicy(2)) + ); + Document d = new Document(); + + // this field will have norms + Field f1 = newField("f1", "This field has norms", Field.Store.NO, Field.Index.ANALYZED); + d.add(f1); + + // this field will NOT have norms + Field f2 = newField("f2", "This field has NO norms in all docs", Field.Store.NO, Field.Index.ANALYZED); + f2.setOmitNorms(true); + d.add(f2); + + for (int i = 0; i < 30; i++) { + writer.addDocument(d); + } + + // now we add another document which has norms for field f2 and not for f1 and verify if the SegmentMerger + // keep things constant + d = new Document(); + + // Reverese + f1.setOmitNorms(true); + d.add(f1); + + f2.setOmitNorms(false); + d.add(f2); + + for (int i = 0; i < 30; i++) { + writer.addDocument(d); + } + + // force merge + writer.optimize(); + // flush + writer.close(); + + _TestUtil.checkIndex(ram); + + SegmentReader reader = getOnlySegmentReader(IndexReader.open(ram, false)); + FieldInfos fi = reader.fieldInfos(); + assertTrue("OmitNorms field bit should be set.", fi.fieldInfo("f1").omitNorms); + assertTrue("OmitNorms field bit should be set.", fi.fieldInfo("f2").omitNorms); + + reader.close(); + ram.close(); + } + + // Make sure first adding docs that do not omitNorms for + // field X, then adding docs that do omitNorms for that same + // field, + public void testMixedRAM() throws Exception { + Directory ram = newDirectory(); + Analyzer analyzer = new MockAnalyzer(); + IndexWriter writer = new IndexWriter( + ram, + newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer). + setMaxBufferedDocs(10). + setMergePolicy(newLogMergePolicy(2)) + ); + Document d = new Document(); + + // this field will have norms + Field f1 = newField("f1", "This field has norms", Field.Store.NO, Field.Index.ANALYZED); + d.add(f1); + + // this field will NOT have norms + Field f2 = newField("f2", "This field has NO norms in all docs", Field.Store.NO, Field.Index.ANALYZED); + d.add(f2); + + for (int i = 0; i < 5; i++) { + writer.addDocument(d); + } + + f2.setOmitNorms(true); + + for (int i = 0; i < 20; i++) { + writer.addDocument(d); + } + + // force merge + writer.optimize(); + + // flush + writer.close(); + + _TestUtil.checkIndex(ram); + + SegmentReader reader = getOnlySegmentReader(IndexReader.open(ram, false)); + FieldInfos fi = reader.fieldInfos(); + assertTrue("OmitNorms field bit should not be set.", !fi.fieldInfo("f1").omitNorms); + assertTrue("OmitNorms field bit should be set.", fi.fieldInfo("f2").omitNorms); + + reader.close(); + ram.close(); + } + + private void assertNoNrm(Directory dir) throws Throwable { + final String[] files = dir.listAll(); + for (int i = 0; i < files.length; i++) { + assertFalse(files[i].endsWith(".nrm")); + } + } + + // Verifies no *.nrm exists when all fields omit norms: + public void testNoNrmFile() throws Throwable { + Directory ram = newDirectory(); + Analyzer analyzer = new MockAnalyzer(); + IndexWriter writer = new IndexWriter(ram, newIndexWriterConfig( + TEST_VERSION_CURRENT, analyzer).setMaxBufferedDocs(3).setMergePolicy(newLogMergePolicy())); + writer.setInfoStream(VERBOSE ? System.out : null); + LogMergePolicy lmp = (LogMergePolicy) writer.getConfig().getMergePolicy(); + lmp.setMergeFactor(2); + lmp.setUseCompoundFile(false); + Document d = new Document(); + + Field f1 = newField("f1", "This field has no norms", Field.Store.NO, Field.Index.ANALYZED); + f1.setOmitNorms(true); + d.add(f1); + + for (int i = 0; i < 30; i++) { + writer.addDocument(d); + } + + writer.commit(); + + assertNoNrm(ram); + + // force merge + writer.optimize(); + // flush + writer.close(); + + assertNoNrm(ram); + _TestUtil.checkIndex(ram); + ram.close(); + } + + /** + * Tests various combinations of omitNorms=true/false, the field not existing at all, + * ensuring that only omitNorms is 'viral'. + * Internally checks that MultiNorms.norms() is consistent (returns the same bytes) + * as the optimized equivalent. + */ + public void testOmitNormsCombos() throws IOException { + // indexed with norms + Field norms = new Field("foo", "a", Field.Store.YES, Field.Index.ANALYZED); + // indexed without norms + Field noNorms = new Field("foo", "a", Field.Store.YES, Field.Index.ANALYZED_NO_NORMS); + // not indexed, but stored + Field noIndex = new Field("foo", "a", Field.Store.YES, Field.Index.NO); + // not indexed but stored, omitNorms is set + Field noNormsNoIndex = new Field("foo", "a", Field.Store.YES, Field.Index.NO); + noNormsNoIndex.setOmitNorms(true); + // not indexed nor stored (doesnt exist at all, we index a different field instead) + Field emptyNorms = new Field("bar", "a", Field.Store.YES, Field.Index.ANALYZED); + + assertNotNull(getNorms("foo", norms, norms)); + assertNull(getNorms("foo", norms, noNorms)); + assertNotNull(getNorms("foo", norms, noIndex)); + assertNotNull(getNorms("foo", norms, noNormsNoIndex)); + assertNotNull(getNorms("foo", norms, emptyNorms)); + assertNull(getNorms("foo", noNorms, noNorms)); + assertNull(getNorms("foo", noNorms, noIndex)); + assertNull(getNorms("foo", noNorms, noNormsNoIndex)); + assertNull(getNorms("foo", noNorms, emptyNorms)); + assertNull(getNorms("foo", noIndex, noIndex)); + assertNull(getNorms("foo", noIndex, noNormsNoIndex)); + assertNull(getNorms("foo", noIndex, emptyNorms)); + assertNull(getNorms("foo", noNormsNoIndex, noNormsNoIndex)); + assertNull(getNorms("foo", noNormsNoIndex, emptyNorms)); + assertNull(getNorms("foo", emptyNorms, emptyNorms)); + } + + /** + * Indexes at least 1 document with f1, and at least 1 document with f2. + * returns the norms for "field". + */ + static byte[] getNorms(String field, Field f1, Field f2) throws IOException { + Directory dir = newDirectory(); + IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).setMergePolicy(newInOrderLogMergePolicy()); + RandomIndexWriter riw = new RandomIndexWriter(random, dir, iwc); + + // add f1 + Document d = new Document(); + d.add(f1); + riw.addDocument(d); + + // add f2 + d = new Document(); + d.add(f2); + riw.addDocument(d); + + // add a mix of f1's and f2's + int numExtraDocs = _TestUtil.nextInt(random, 1, 1000); + for (int i = 0; i < numExtraDocs; i++) { + d = new Document(); + d.add(random.nextBoolean() ? f1 : f2); + riw.addDocument(d); + } + + IndexReader ir1 = riw.getReader(); + byte[] norms1 = MultiNorms.norms(ir1, field); + + // optimize and validate MultiNorms against single segment. + riw.optimize(); + IndexReader ir2 = riw.getReader(); + byte[] norms2 = ir2.getSequentialSubReaders()[0].norms(field); + + assertArrayEquals(norms1, norms2); + ir1.close(); + ir2.close(); + riw.close(); + dir.close(); + return norms1; + } +} diff --git a/lucene/src/test/org/apache/lucene/index/TestOmitTf.java b/lucene/src/test/org/apache/lucene/index/TestOmitTf.java index 49c72e6c029..3b26e4e7cd9 100644 --- a/lucene/src/test/org/apache/lucene/index/TestOmitTf.java +++ b/lucene/src/test/org/apache/lucene/index/TestOmitTf.java @@ -26,6 +26,7 @@ import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; +import org.apache.lucene.index.IndexReader.AtomicReaderContext; import org.apache.lucene.search.*; import org.apache.lucene.search.BooleanClause.Occur; import org.apache.lucene.store.Directory; @@ -34,13 +35,11 @@ import org.apache.lucene.search.Explanation.IDFExplanation; public class TestOmitTf extends LuceneTestCase { - public static class SimpleSimilarity extends Similarity { - @Override public float lengthNorm(String field, int numTerms) { return 1.0f; } - @Override public float queryNorm(float sumOfSquaredWeights) { return 1.0f; } + public static class SimpleSimilarity extends Similarity implements SimilarityProvider { + @Override public float computeNorm(String field, FieldInvertState state) { return state.getBoost(); } @Override public float tf(float freq) { return freq; } @Override public float sloppyFreq(int distance) { return 2.0f; } @Override public float idf(int docFreq, int numDocs) { return 1.0f; } - @Override public float coord(int overlap, int maxOverlap) { return 1.0f; } @Override public IDFExplanation idfExplain(Collection terms, IndexSearcher searcher) throws IOException { return new IDFExplanation() { @Override @@ -53,6 +52,11 @@ public class TestOmitTf extends LuceneTestCase { } }; } + public float queryNorm(float sumOfSquaredWeights) { return 1.0f; } + public float coord(int overlap, int maxOverlap) { return 1.0f; } + public Similarity get(String field) { + return this; + } } // Tests whether the DocumentWriter correctly enable the @@ -215,7 +219,7 @@ public class TestOmitTf extends LuceneTestCase { Directory ram = newDirectory(); Analyzer analyzer = new MockAnalyzer(); IndexWriter writer = new IndexWriter(ram, newIndexWriterConfig( - TEST_VERSION_CURRENT, analyzer).setMaxBufferedDocs(3)); + TEST_VERSION_CURRENT, analyzer).setMaxBufferedDocs(3).setMergePolicy(newLogMergePolicy())); LogMergePolicy lmp = (LogMergePolicy) writer.getConfig().getMergePolicy(); lmp.setMergeFactor(2); lmp.setUseCompoundFile(false); @@ -250,9 +254,10 @@ public class TestOmitTf extends LuceneTestCase { dir, newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer). setMaxBufferedDocs(2). - setSimilarity(new SimpleSimilarity()). - setMergePolicy(newLogMergePolicy(2)) + setSimilarityProvider(new SimpleSimilarity()). + setMergePolicy(newInOrderLogMergePolicy(2)) ); + writer.setInfoStream(VERBOSE ? System.out : null); StringBuilder sb = new StringBuilder(265); String term = "term"; @@ -280,7 +285,7 @@ public class TestOmitTf extends LuceneTestCase { * Verify the index */ IndexSearcher searcher = new IndexSearcher(dir, true); - searcher.setSimilarity(new SimpleSimilarity()); + searcher.setSimilarityProvider(new SimpleSimilarity()); Term a = new Term("noTf", term); Term b = new Term("tf", term); @@ -330,7 +335,7 @@ public class TestOmitTf extends LuceneTestCase { public final void collect(int doc) throws IOException { //System.out.println("Q2: Doc=" + doc + " score=" + score); float score = scorer.score(); - assertTrue(score==1.0f+doc); + assertEquals(1.0f+doc, score, 0.00001f); super.collect(doc); } }); @@ -414,8 +419,8 @@ public class TestOmitTf extends LuceneTestCase { public static int getSum() { return sum; } @Override - public void setNextReader(IndexReader reader, int docBase) { - this.docBase = docBase; + public void setNextReader(AtomicReaderContext context) { + docBase = context.docBase; } @Override public boolean acceptsDocsOutOfOrder() { diff --git a/lucene/src/test/org/apache/lucene/index/TestParallelReader.java b/lucene/src/test/org/apache/lucene/index/TestParallelReader.java index 319b6ce9782..fdee60a05aa 100644 --- a/lucene/src/test/org/apache/lucene/index/TestParallelReader.java +++ b/lucene/src/test/org/apache/lucene/index/TestParallelReader.java @@ -47,7 +47,9 @@ public class TestParallelReader extends LuceneTestCase { @Override public void tearDown() throws Exception { single.getIndexReader().close(); + single.close(); parallel.getIndexReader().close(); + parallel.close(); dir.close(); dir1.close(); dir2.close(); @@ -147,7 +149,8 @@ public class TestParallelReader extends LuceneTestCase { assertTrue(pr.isCurrent()); IndexReader modifier = IndexReader.open(dir1, false); - modifier.setNorm(0, "f1", 100); + SimilarityProvider sim = new DefaultSimilarity(); + modifier.setNorm(0, "f1", sim.get("f1").encodeNormValue(100f)); modifier.close(); // one of the two IndexReaders which ParallelReader is using @@ -155,7 +158,7 @@ public class TestParallelReader extends LuceneTestCase { assertFalse(pr.isCurrent()); modifier = IndexReader.open(dir2, false); - modifier.setNorm(0, "f3", 100); + modifier.setNorm(0, "f3", sim.get("f3").encodeNormValue(100f)); modifier.close(); // now both are not current anymore @@ -266,7 +269,7 @@ public class TestParallelReader extends LuceneTestCase { ParallelReader pr = new ParallelReader(); pr.add(IndexReader.open(dir1, false)); pr.add(IndexReader.open(dir2, false)); - return new IndexSearcher(pr); + return newSearcher(pr); } private Directory getDir1(Random random) throws IOException { diff --git a/lucene/src/test/org/apache/lucene/index/TestPayloads.java b/lucene/src/test/org/apache/lucene/index/TestPayloads.java index ac12cf28743..5841c79fc10 100644 --- a/lucene/src/test/org/apache/lucene/index/TestPayloads.java +++ b/lucene/src/test/org/apache/lucene/index/TestPayloads.java @@ -163,7 +163,8 @@ public class TestPayloads extends LuceneTestCase { PayloadAnalyzer analyzer = new PayloadAnalyzer(); IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, analyzer) - .setOpenMode(OpenMode.CREATE)); + .setOpenMode(OpenMode.CREATE) + .setMergePolicy(newInOrderLogMergePolicy())); // should be in sync with value in TermInfosWriter final int skipInterval = 16; diff --git a/lucene/src/test/org/apache/lucene/index/TestPerFieldCodecSupport.java b/lucene/src/test/org/apache/lucene/index/TestPerFieldCodecSupport.java index d59c0259d13..4ed85a91e87 100644 --- a/lucene/src/test/org/apache/lucene/index/TestPerFieldCodecSupport.java +++ b/lucene/src/test/org/apache/lucene/index/TestPerFieldCodecSupport.java @@ -123,7 +123,7 @@ public class TestPerFieldCodecSupport extends LuceneTestCase { IndexWriterConfig iwconf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).setOpenMode(OpenMode.CREATE).setCodecProvider(provider); iwconf.setMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH); - ((LogMergePolicy) iwconf.getMergePolicy()).setMergeFactor(10); + //((LogMergePolicy) iwconf.getMergePolicy()).setMergeFactor(10); IndexWriter writer = newWriter(dir, iwconf); addDocs(writer, 10); @@ -143,8 +143,8 @@ public class TestPerFieldCodecSupport extends LuceneTestCase { iwconf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()) .setOpenMode(OpenMode.APPEND).setCodecProvider(provider); - ((LogMergePolicy) iwconf.getMergePolicy()).setUseCompoundFile(false); - ((LogMergePolicy) iwconf.getMergePolicy()).setMergeFactor(10); + //((LogMergePolicy) iwconf.getMergePolicy()).setUseCompoundFile(false); + //((LogMergePolicy) iwconf.getMergePolicy()).setMergeFactor(10); iwconf.setMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH); provider = new MockCodecProvider2(); // uses standard for field content @@ -227,7 +227,7 @@ public class TestPerFieldCodecSupport extends LuceneTestCase { } IndexReader reader = IndexReader.open(dir, null, true, IndexReader.DEFAULT_TERMS_INDEX_DIVISOR, codecs); - IndexSearcher searcher = new IndexSearcher(reader); + IndexSearcher searcher = newSearcher(reader); TopDocs search = searcher.search(new TermQuery(t), num + 10); assertEquals(num, search.totalHits); searcher.close(); diff --git a/lucene/src/test/org/apache/lucene/index/TestPerSegmentDeletes.java b/lucene/src/test/org/apache/lucene/index/TestPerSegmentDeletes.java index 294f11058ae..c7312b45ab8 100644 --- a/lucene/src/test/org/apache/lucene/index/TestPerSegmentDeletes.java +++ b/lucene/src/test/org/apache/lucene/index/TestPerSegmentDeletes.java @@ -46,6 +46,7 @@ public class TestPerSegmentDeletes extends LuceneTestCase { RangeMergePolicy fsmp = new RangeMergePolicy(false); iwc.setMergePolicy(fsmp); IndexWriter writer = new IndexWriter(dir, iwc); + writer.setInfoStream(VERBOSE ? System.out : null); for (int x = 0; x < 5; x++) { writer.addDocument(TestIndexWriterReader.createDocument(x, "1", 2)); //System.out.println("numRamDocs(" + x + ")" + writer.numRamDocs()); @@ -73,12 +74,12 @@ public class TestPerSegmentDeletes extends LuceneTestCase { // flushing without applying deletes means // there will still be deletes in the segment infos writer.flush(false, false); - assertTrue(writer.bufferedDeletes.any()); + assertTrue(writer.bufferedDeletesStream.any()); // get reader flushes pending deletes // so there should not be anymore IndexReader r1 = writer.getReader(); - assertFalse(writer.bufferedDeletes.any()); + assertFalse(writer.bufferedDeletesStream.any()); r1.close(); // delete id:2 from the first segment @@ -256,6 +257,7 @@ public class TestPerSegmentDeletes extends LuceneTestCase { @Override public void close() {} + @Override public MergeSpecification findMerges(SegmentInfos segmentInfos) throws CorruptIndexException, IOException { MergeSpecification ms = new MergeSpecification(); diff --git a/lucene/src/test/org/apache/lucene/index/TestSegmentInfo.java b/lucene/src/test/org/apache/lucene/index/TestSegmentInfo.java new file mode 100644 index 00000000000..d363edb777d --- /dev/null +++ b/lucene/src/test/org/apache/lucene/index/TestSegmentInfo.java @@ -0,0 +1,49 @@ +package org.apache.lucene.index; + +import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.Field.Index; +import org.apache.lucene.document.Field.Store; +import org.apache.lucene.store.Directory; +import org.apache.lucene.util.LuceneTestCase; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +public class TestSegmentInfo extends LuceneTestCase { + + public void testSizeInBytesCache() throws Exception { + Directory dir = newDirectory(); + IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).setMergePolicy(newLogMergePolicy()); + IndexWriter writer = new IndexWriter(dir, conf); + writer.setInfoStream(VERBOSE ? System.out : null); + Document doc = new Document(); + doc.add(new Field("a", "value", Store.YES, Index.ANALYZED)); + writer.addDocument(doc); + writer.close(); + + SegmentInfos sis = new SegmentInfos(); + sis.read(dir); + SegmentInfo si = sis.info(0); + long sizeInBytesNoStore = si.sizeInBytes(false); + long sizeInBytesWithStore = si.sizeInBytes(true); + assertTrue("sizeInBytesNoStore=" + sizeInBytesNoStore + " sizeInBytesWithStore=" + sizeInBytesWithStore, sizeInBytesWithStore > sizeInBytesNoStore); + dir.close(); + } + +} diff --git a/lucene/src/test/org/apache/lucene/index/TestSegmentReader.java b/lucene/src/test/org/apache/lucene/index/TestSegmentReader.java index 5e656ae9eb1..d284f11dfa0 100644 --- a/lucene/src/test/org/apache/lucene/index/TestSegmentReader.java +++ b/lucene/src/test/org/apache/lucene/index/TestSegmentReader.java @@ -27,7 +27,6 @@ import org.apache.lucene.util.BytesRef; import org.apache.lucene.document.Document; import org.apache.lucene.document.Fieldable; -import org.apache.lucene.search.Similarity; import org.apache.lucene.store.Directory; public class TestSegmentReader extends LuceneTestCase { @@ -180,15 +179,9 @@ public class TestSegmentReader extends LuceneTestCase { assertEquals(reader.hasNorms(f.name()), !f.getOmitNorms()); assertEquals(reader.hasNorms(f.name()), !DocHelper.noNorms.containsKey(f.name())); if (!reader.hasNorms(f.name())) { - // test for fake norms of 1.0 or null depending on the flag + // test for norms of null byte [] norms = MultiNorms.norms(reader, f.name()); - byte norm1 = Similarity.getDefault().encodeNormValue(1.0f); assertNull(norms); - norms = new byte[reader.maxDoc()]; - MultiNorms.norms(reader, f.name(),norms, 0); - for (int j=0; j docs = new HashMap(); IndexWriter w = new MockIndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer()).setOpenMode(OpenMode.CREATE).setRAMBufferSizeMB( - 0.1).setMaxBufferedDocs(maxBufferedDocs)); + 0.1).setMaxBufferedDocs(maxBufferedDocs).setMergePolicy(newLogMergePolicy())); w.setInfoStream(VERBOSE ? System.out : null); w.commit(); LogMergePolicy lmp = (LogMergePolicy) w.getConfig().getMergePolicy(); @@ -206,7 +206,7 @@ public class TestStressIndexing2 extends LuceneTestCase { IndexWriter w = new MockIndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer()).setOpenMode(OpenMode.CREATE) .setRAMBufferSizeMB(0.1).setMaxBufferedDocs(maxBufferedDocs).setMaxThreadStates(maxThreadStates) - .setReaderPooling(doReaderPooling)); + .setReaderPooling(doReaderPooling).setMergePolicy(newLogMergePolicy())); w.setInfoStream(VERBOSE ? System.out : null); LogMergePolicy lmp = (LogMergePolicy) w.getConfig().getMergePolicy(); lmp.setUseCompoundFile(false); @@ -248,7 +248,7 @@ public class TestStressIndexing2 extends LuceneTestCase { public static void indexSerial(Random random, Map docs, Directory dir) throws IOException { - IndexWriter w = new IndexWriter(dir, LuceneTestCase.newIndexWriterConfig(random, TEST_VERSION_CURRENT, new MockAnalyzer())); + IndexWriter w = new IndexWriter(dir, LuceneTestCase.newIndexWriterConfig(random, TEST_VERSION_CURRENT, new MockAnalyzer()).setMergePolicy(newLogMergePolicy())); // index all docs in a single thread Iterator iter = docs.values().iterator(); diff --git a/lucene/src/test/org/apache/lucene/index/index.31.cfs.zip b/lucene/src/test/org/apache/lucene/index/index.31.cfs.zip index 02eac457956..bfbe17911ce 100644 Binary files a/lucene/src/test/org/apache/lucene/index/index.31.cfs.zip and b/lucene/src/test/org/apache/lucene/index/index.31.cfs.zip differ diff --git a/lucene/src/test/org/apache/lucene/index/index.31.nocfs.zip b/lucene/src/test/org/apache/lucene/index/index.31.nocfs.zip index 47af2bfe3a1..c456941eac1 100644 Binary files a/lucene/src/test/org/apache/lucene/index/index.31.nocfs.zip and b/lucene/src/test/org/apache/lucene/index/index.31.nocfs.zip differ diff --git a/lucene/src/test/org/apache/lucene/index/values/TestDocValuesIndexing.java b/lucene/src/test/org/apache/lucene/index/values/TestDocValuesIndexing.java index 1883958eb03..fe784a53b47 100644 --- a/lucene/src/test/org/apache/lucene/index/values/TestDocValuesIndexing.java +++ b/lucene/src/test/org/apache/lucene/index/values/TestDocValuesIndexing.java @@ -53,6 +53,7 @@ import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TopDocs; import org.apache.lucene.store.Directory; import org.apache.lucene.store.LockObtainFailedException; +import org.apache.lucene.store.MockDirectoryWrapper; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.FloatsRef; import org.apache.lucene.util.LongsRef; @@ -87,13 +88,19 @@ public class TestDocValuesIndexing extends LuceneTestCase { provider = new DocValuesCodecProvider(); provider.copyFrom(CodecProvider.getDefault()); } + + private Directory newDirectory2() throws IOException { + MockDirectoryWrapper newDirectory = newDirectory(); + newDirectory.setCheckIndexOnClose(false); + return newDirectory; + } /* * Simple test case to show how to use the API */ public void testDocValuesSimple() throws CorruptIndexException, IOException, ParseException { - Directory dir = newDirectory(); + Directory dir = newDirectory2(); IndexWriter writer = new IndexWriter(dir, writerConfig(false)); for (int i = 0; i < 5; i++) { Document doc = new Document(); @@ -175,7 +182,7 @@ public class TestDocValuesIndexing extends LuceneTestCase { Type second = values.get(1); String msg = "[first=" + first.name() + ", second=" + second.name() + "]"; // index first index - Directory d_1 = newDirectory(); + Directory d_1 = newDirectory2(); IndexWriter w_1 = new IndexWriter(d_1, writerConfig(random.nextBoolean())); indexValues(w_1, valuesPerIndex, first, values, false, 7); w_1.commit(); @@ -183,17 +190,17 @@ public class TestDocValuesIndexing extends LuceneTestCase { _TestUtil.checkIndex(d_1, w_1.getConfig().getCodecProvider()); // index second index - Directory d_2 = newDirectory(); + Directory d_2 = newDirectory2(); IndexWriter w_2 = new IndexWriter(d_2, writerConfig(random.nextBoolean())); indexValues(w_2, valuesPerIndex, second, values, false, 7); w_2.commit(); assertEquals(valuesPerIndex, w_2.maxDoc()); _TestUtil.checkIndex(d_2, w_2.getConfig().getCodecProvider()); - Directory target = newDirectory(); + Directory target = newDirectory2(); IndexWriter w = new IndexWriter(target, writerConfig(random.nextBoolean())); - IndexReader r_1 = IndexReader.open(w_1); - IndexReader r_2 = IndexReader.open(w_2); + IndexReader r_1 = IndexReader.open(w_1, true); + IndexReader r_2 = IndexReader.open(w_2, true); if (random.nextBoolean()) { w.addIndexes(d_1, d_2); } else { @@ -207,7 +214,7 @@ public class TestDocValuesIndexing extends LuceneTestCase { // check values - IndexReader merged = IndexReader.open(w); + IndexReader merged = IndexReader.open(w, true); DocValuesEnum vE_1 = getValuesEnum(getDocValues(r_1, first.name())); DocValuesEnum vE_2 = getValuesEnum(getDocValues(r_2, second.name())); DocValuesEnum vE_1_merged = getValuesEnum(getDocValues(merged, first.name())); @@ -243,12 +250,16 @@ public class TestDocValuesIndexing extends LuceneTestCase { private IndexWriterConfig writerConfig(boolean useCompoundFile) { final IndexWriterConfig cfg = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()); + cfg.setMergePolicy(newLogMergePolicy(random)); MergePolicy mergePolicy = cfg.getMergePolicy(); if (mergePolicy instanceof LogMergePolicy) { - ((LogMergePolicy) mergePolicy).setUseCompoundFile(useCompoundFile); + LogMergePolicy policy = ((LogMergePolicy) mergePolicy); + policy.setUseCompoundFile(useCompoundFile); + policy.setRequireContiguousMerge(true); } else if (useCompoundFile) { LogMergePolicy policy = new LogDocMergePolicy(); policy.setUseCompoundFile(useCompoundFile); + policy.setRequireContiguousMerge(true); cfg.setMergePolicy(policy); } cfg.setCodecProvider(provider); @@ -257,7 +268,7 @@ public class TestDocValuesIndexing extends LuceneTestCase { public void runTestNumerics(IndexWriterConfig cfg, boolean withDeletions) throws IOException { - Directory d = newDirectory(); + Directory d = newDirectory2(); IndexWriter w = new IndexWriter(d, cfg); final int numValues = 179 + random.nextInt(151); final List numVariantList = new ArrayList(NUMERICS); @@ -268,7 +279,7 @@ public class TestDocValuesIndexing extends LuceneTestCase { OpenBitSet deleted = indexValues(w, numValues, val, numVariantList, withDeletions, 7); List closeables = new ArrayList(); - IndexReader r = IndexReader.open(w); + IndexReader r = IndexReader.open(w, true); final int numRemainingValues = (int) (numValues - deleted.cardinality()); final int base = r.numDocs() - numRemainingValues; switch (val) { @@ -349,7 +360,7 @@ public class TestDocValuesIndexing extends LuceneTestCase { public void runTestIndexBytes(IndexWriterConfig cfg, boolean withDeletions) throws CorruptIndexException, LockObtainFailedException, IOException { - final Directory d = newDirectory(); + final Directory d = newDirectory2(); IndexWriter w = new IndexWriter(d, cfg); final List byteVariantList = new ArrayList(BYTES); // run in random order to test if fill works correctly during merges @@ -361,7 +372,7 @@ public class TestDocValuesIndexing extends LuceneTestCase { int bytesSize = 7 + random.nextInt(128); OpenBitSet deleted = indexValues(w, numValues, byteIndexValue, byteVariantList, withDeletions, bytesSize); - final IndexReader r = IndexReader.open(w); + final IndexReader r = IndexReader.open(w, withDeletions); assertEquals(0, r.numDeletedDocs()); final int numRemainingValues = (int) (numValues - deleted.cardinality()); final int base = r.numDocs() - numRemainingValues; @@ -460,7 +471,6 @@ public class TestDocValuesIndexing extends LuceneTestCase { private DocValues getDocValues(IndexReader reader, String field) throws IOException { boolean optimized = reader.isOptimized(); - reader.isCurrent(); Fields fields = optimized ? reader.getSequentialSubReaders()[0].fields() : MultiFields.getFields(reader); switch (random.nextInt(optimized ? 3 : 2)) { // case 2 only if optimized diff --git a/lucene/src/test/org/apache/lucene/queryParser/TestQueryParser.java b/lucene/src/test/org/apache/lucene/queryParser/TestQueryParser.java index 585df3a70c5..4803a58a339 100644 --- a/lucene/src/test/org/apache/lucene/queryParser/TestQueryParser.java +++ b/lucene/src/test/org/apache/lucene/queryParser/TestQueryParser.java @@ -1140,12 +1140,13 @@ public class TestQueryParser extends LuceneTestCase { Document doc = new Document(); doc.add(newField("f", "the wizard of ozzy", Field.Store.NO, Field.Index.ANALYZED)); w.addDocument(doc); - IndexReader r = IndexReader.open(w); + IndexReader r = IndexReader.open(w, true); w.close(); - IndexSearcher s = new IndexSearcher(r); + IndexSearcher s = newSearcher(r); QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "f", a); Query q = qp.parse("\"wizard of ozzy\""); assertEquals(1, s.search(q, 1).totalHits); + s.close(); r.close(); dir.close(); } diff --git a/lucene/src/test/org/apache/lucene/search/BaseTestRangeFilter.java b/lucene/src/test/org/apache/lucene/search/BaseTestRangeFilter.java index 332ba958698..6d21b13185c 100644 --- a/lucene/src/test/org/apache/lucene/search/BaseTestRangeFilter.java +++ b/lucene/src/test/org/apache/lucene/search/BaseTestRangeFilter.java @@ -124,14 +124,14 @@ public class BaseTestRangeFilter extends LuceneTestCase { RandomIndexWriter writer = new RandomIndexWriter(random, index.index, newIndexWriterConfig(random, TEST_VERSION_CURRENT, new MockAnalyzer()) - .setOpenMode(OpenMode.CREATE).setMaxBufferedDocs(_TestUtil.nextInt(random, 50, 1000))); + .setOpenMode(OpenMode.CREATE).setMaxBufferedDocs(_TestUtil.nextInt(random, 50, 1000)).setMergePolicy(newLogMergePolicy())); + _TestUtil.reduceOpenFiles(writer.w); + while(true) { int minCount = 0; int maxCount = 0; - _TestUtil.reduceOpenFiles(writer.w); - for (int d = minId; d <= maxId; d++) { idField.setValue(pad(d)); int r = index.allowNegativeRandomInts ? random.nextInt() : random diff --git a/lucene/src/test/org/apache/lucene/search/CachingWrapperFilterHelper.java b/lucene/src/test/org/apache/lucene/search/CachingWrapperFilterHelper.java index 80df5720f0f..41872acd8bd 100644 --- a/lucene/src/test/org/apache/lucene/search/CachingWrapperFilterHelper.java +++ b/lucene/src/test/org/apache/lucene/search/CachingWrapperFilterHelper.java @@ -20,7 +20,8 @@ package org.apache.lucene.search; import java.io.IOException; import junit.framework.Assert; -import org.apache.lucene.index.IndexReader; + +import org.apache.lucene.index.IndexReader.AtomicReaderContext; /** * A unit test helper class to test when the filter is getting cached and when it is not. @@ -41,10 +42,10 @@ public class CachingWrapperFilterHelper extends CachingWrapperFilter { } @Override - public synchronized DocIdSet getDocIdSet(IndexReader reader) throws IOException { + public synchronized DocIdSet getDocIdSet(AtomicReaderContext context) throws IOException { final int saveMissCount = missCount; - DocIdSet docIdSet = super.getDocIdSet(reader); + DocIdSet docIdSet = super.getDocIdSet(context); if (shouldHaveCache) { Assert.assertEquals("Cache should have data ", saveMissCount, missCount); diff --git a/lucene/src/test/org/apache/lucene/search/JustCompileSearch.java b/lucene/src/test/org/apache/lucene/search/JustCompileSearch.java index daa24c91658..db54970334d 100644 --- a/lucene/src/test/org/apache/lucene/search/JustCompileSearch.java +++ b/lucene/src/test/org/apache/lucene/search/JustCompileSearch.java @@ -19,8 +19,9 @@ package org.apache.lucene.search; import java.io.IOException; -import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexReader.AtomicReaderContext; import org.apache.lucene.util.BytesRef; +import org.apache.lucene.index.FieldInvertState; import org.apache.lucene.util.PriorityQueue; /** @@ -42,7 +43,7 @@ final class JustCompileSearch { } @Override - public void setNextReader(IndexReader reader, int docBase) + public void setNextReader(AtomicReaderContext context) throws IOException { throw new UnsupportedOperationException(UNSUPPORTED_MSG); } @@ -125,7 +126,7 @@ final class JustCompileSearch { } @Override - public FieldComparator setNextReader(IndexReader reader, int docBase) + public FieldComparator setNextReader(AtomicReaderContext context) throws IOException { throw new UnsupportedOperationException(UNSUPPORTED_MSG); } @@ -152,7 +153,7 @@ final class JustCompileSearch { // still added here in case someone will add abstract methods in the future. @Override - public DocIdSet getDocIdSet(IndexReader reader) throws IOException { + public DocIdSet getDocIdSet(AtomicReaderContext context) throws IOException { return null; } } @@ -208,12 +209,12 @@ final class JustCompileSearch { static final class JustCompileScorer extends Scorer { - protected JustCompileScorer(Similarity similarity) { - super(similarity); + protected JustCompileScorer(Weight weight) { + super(weight); } @Override - protected boolean score(Collector collector, int max, int firstDocID) + public boolean score(Collector collector, int max, int firstDocID) throws IOException { throw new UnsupportedOperationException(UNSUPPORTED_MSG); } @@ -241,23 +242,13 @@ final class JustCompileSearch { static final class JustCompileSimilarity extends Similarity { - @Override - public float coord(int overlap, int maxOverlap) { - throw new UnsupportedOperationException(UNSUPPORTED_MSG); - } - @Override public float idf(int docFreq, int numDocs) { throw new UnsupportedOperationException(UNSUPPORTED_MSG); } @Override - public float lengthNorm(String fieldName, int numTokens) { - throw new UnsupportedOperationException(UNSUPPORTED_MSG); - } - - @Override - public float queryNorm(float sumOfSquaredWeights) { + public float computeNorm(String fieldName, FieldInvertState state) { throw new UnsupportedOperationException(UNSUPPORTED_MSG); } @@ -269,19 +260,33 @@ final class JustCompileSearch { @Override public float tf(float freq) { throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + } + + static final class JustCompileSimilarityProvider implements SimilarityProvider { + + public float queryNorm(float sumOfSquaredWeights) { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); } + public float coord(int overlap, int maxOverlap) { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + public Similarity get(String field) { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } } static final class JustCompileSpanFilter extends SpanFilter { @Override - public SpanFilterResult bitSpans(IndexReader reader) throws IOException { + public SpanFilterResult bitSpans(AtomicReaderContext context) throws IOException { throw new UnsupportedOperationException(UNSUPPORTED_MSG); } @Override - public DocIdSet getDocIdSet(IndexReader reader) throws IOException { + public DocIdSet getDocIdSet(AtomicReaderContext context) throws IOException { return null; } } @@ -298,7 +303,7 @@ final class JustCompileSearch { } @Override - public void setNextReader(IndexReader reader, int docBase) + public void setNextReader(AtomicReaderContext context) throws IOException { throw new UnsupportedOperationException(UNSUPPORTED_MSG); } @@ -333,7 +338,7 @@ final class JustCompileSearch { static final class JustCompileWeight extends Weight { @Override - public Explanation explain(IndexReader reader, int doc) throws IOException { + public Explanation explain(AtomicReaderContext context, int doc) throws IOException { throw new UnsupportedOperationException(UNSUPPORTED_MSG); } @@ -358,7 +363,7 @@ final class JustCompileSearch { } @Override - public Scorer scorer(IndexReader reader, boolean scoreDocsInOrder, boolean topScorer) + public Scorer scorer(AtomicReaderContext context, ScorerContext scorerContext) throws IOException { throw new UnsupportedOperationException(UNSUPPORTED_MSG); } diff --git a/lucene/src/test/org/apache/lucene/search/MockFilter.java b/lucene/src/test/org/apache/lucene/search/MockFilter.java index 36b4247fa91..1152db0f3d8 100644 --- a/lucene/src/test/org/apache/lucene/search/MockFilter.java +++ b/lucene/src/test/org/apache/lucene/search/MockFilter.java @@ -17,7 +17,7 @@ package org.apache.lucene.search; * limitations under the License. */ -import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexReader.AtomicReaderContext; import org.apache.lucene.util.DocIdBitSet; import java.util.BitSet; @@ -25,7 +25,7 @@ public class MockFilter extends Filter { private boolean wasCalled; @Override - public DocIdSet getDocIdSet(IndexReader reader) { + public DocIdSet getDocIdSet(AtomicReaderContext context) { wasCalled = true; return new DocIdBitSet(new BitSet()); } diff --git a/lucene/src/test/org/apache/lucene/search/MultiCollectorTest.java b/lucene/src/test/org/apache/lucene/search/MultiCollectorTest.java index ae988c04ad2..a87135214ea 100644 --- a/lucene/src/test/org/apache/lucene/search/MultiCollectorTest.java +++ b/lucene/src/test/org/apache/lucene/search/MultiCollectorTest.java @@ -19,7 +19,7 @@ package org.apache.lucene.search; import java.io.IOException; -import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexReader.AtomicReaderContext; import org.apache.lucene.search.Collector; import org.apache.lucene.search.Scorer; import org.apache.lucene.util.LuceneTestCase; @@ -46,7 +46,7 @@ public class MultiCollectorTest extends LuceneTestCase { } @Override - public void setNextReader(IndexReader reader, int docBase) throws IOException { + public void setNextReader(AtomicReaderContext context) throws IOException { setNextReaderCalled = true; } @@ -73,7 +73,7 @@ public class MultiCollectorTest extends LuceneTestCase { assertTrue(c instanceof MultiCollector); assertTrue(c.acceptsDocsOutOfOrder()); c.collect(1); - c.setNextReader(null, 0); + c.setNextReader(null); c.setScorer(null); } @@ -95,7 +95,7 @@ public class MultiCollectorTest extends LuceneTestCase { Collector c = MultiCollector.wrap(dcs); assertTrue(c.acceptsDocsOutOfOrder()); c.collect(1); - c.setNextReader(null, 0); + c.setNextReader(null); c.setScorer(null); for (DummyCollector dc : dcs) { diff --git a/lucene/src/test/org/apache/lucene/search/SingleDocTestFilter.java b/lucene/src/test/org/apache/lucene/search/SingleDocTestFilter.java index bd1df4e3ee0..a33a6c178a2 100644 --- a/lucene/src/test/org/apache/lucene/search/SingleDocTestFilter.java +++ b/lucene/src/test/org/apache/lucene/search/SingleDocTestFilter.java @@ -17,7 +17,7 @@ package org.apache.lucene.search; * limitations under the License. */ -import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexReader.AtomicReaderContext; import org.apache.lucene.util.DocIdBitSet; import java.util.BitSet; @@ -31,8 +31,8 @@ public class SingleDocTestFilter extends Filter { } @Override - public DocIdSet getDocIdSet(IndexReader reader) throws IOException { - BitSet bits = new BitSet(reader.maxDoc()); + public DocIdSet getDocIdSet(AtomicReaderContext context) throws IOException { + BitSet bits = new BitSet(context.reader.maxDoc()); bits.set(doc); return new DocIdBitSet(bits); } diff --git a/lucene/src/test/org/apache/lucene/search/TestAutomatonQuery.java b/lucene/src/test/org/apache/lucene/search/TestAutomatonQuery.java index 58e4e4d5b03..410aadc4206 100644 --- a/lucene/src/test/org/apache/lucene/search/TestAutomatonQuery.java +++ b/lucene/src/test/org/apache/lucene/search/TestAutomatonQuery.java @@ -40,6 +40,7 @@ public class TestAutomatonQuery extends LuceneTestCase { private final String FN = "field"; + @Override public void setUp() throws Exception { super.setUp(); directory = newDirectory(); @@ -61,10 +62,11 @@ public class TestAutomatonQuery extends LuceneTestCase { + " with numbers 1234 5678.9 and letter b"); writer.addDocument(doc); reader = writer.getReader(); - searcher = new IndexSearcher(reader); + searcher = newSearcher(reader); writer.close(); } + @Override public void tearDown() throws Exception { searcher.close(); reader.close(); diff --git a/lucene/src/test/org/apache/lucene/search/TestAutomatonQueryUnicode.java b/lucene/src/test/org/apache/lucene/search/TestAutomatonQueryUnicode.java index 29bec6c066a..53af0ad3a62 100644 --- a/lucene/src/test/org/apache/lucene/search/TestAutomatonQueryUnicode.java +++ b/lucene/src/test/org/apache/lucene/search/TestAutomatonQueryUnicode.java @@ -41,6 +41,7 @@ public class TestAutomatonQueryUnicode extends LuceneTestCase { private final String FN = "field"; + @Override public void setUp() throws Exception { super.setUp(); directory = newDirectory(); @@ -81,10 +82,11 @@ public class TestAutomatonQueryUnicode extends LuceneTestCase { field.setValue("\uFFFD\uFFFD"); writer.addDocument(doc); reader = writer.getReader(); - searcher = new IndexSearcher(reader); + searcher = newSearcher(reader); writer.close(); } + @Override public void tearDown() throws Exception { searcher.close(); reader.close(); diff --git a/lucene/src/test/org/apache/lucene/search/TestBoolean2.java b/lucene/src/test/org/apache/lucene/search/TestBoolean2.java index f1cb84a51e5..13184b0d77a 100644 --- a/lucene/src/test/org/apache/lucene/search/TestBoolean2.java +++ b/lucene/src/test/org/apache/lucene/search/TestBoolean2.java @@ -54,7 +54,7 @@ public class TestBoolean2 extends LuceneTestCase { @BeforeClass public static void beforeClass() throws Exception { directory = newDirectory(); - RandomIndexWriter writer= new RandomIndexWriter(random, directory); + RandomIndexWriter writer= new RandomIndexWriter(random, directory, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).setMergePolicy(newInOrderLogMergePolicy())); for (int i = 0; i < docFields.length; i++) { Document doc = new Document(); doc.add(newField(field, docFields[i], Field.Store.NO, Field.Index.ANALYZED)); @@ -92,7 +92,7 @@ public class TestBoolean2 extends LuceneTestCase { w.addDocument(doc); } reader = w.getReader(); - bigSearcher = new IndexSearcher(reader); + bigSearcher = newSearcher(reader); w.close(); } @@ -208,9 +208,9 @@ public class TestBoolean2 extends LuceneTestCase { public void testQueries10() throws Exception { String queryText = "+w3 +xx +w2 zz"; int[] expDocNrs = {2, 3}; - Similarity oldSimilarity = searcher.getSimilarity(); + SimilarityProvider oldSimilarity = searcher.getSimilarityProvider(); try { - searcher.setSimilarity(new DefaultSimilarity(){ + searcher.setSimilarityProvider(new DefaultSimilarity(){ @Override public float coord(int overlap, int maxOverlap) { return overlap / ((float)maxOverlap - 1); @@ -218,7 +218,7 @@ public class TestBoolean2 extends LuceneTestCase { }); queriesTest(queryText, expDocNrs); } finally { - searcher.setSimilarity(oldSimilarity); + searcher.setSimilarityProvider(oldSimilarity); } } diff --git a/lucene/src/test/org/apache/lucene/search/TestBooleanMinShouldMatch.java b/lucene/src/test/org/apache/lucene/search/TestBooleanMinShouldMatch.java index 55b067b6b78..bd53696d570 100644 --- a/lucene/src/test/org/apache/lucene/search/TestBooleanMinShouldMatch.java +++ b/lucene/src/test/org/apache/lucene/search/TestBooleanMinShouldMatch.java @@ -65,7 +65,7 @@ public class TestBooleanMinShouldMatch extends LuceneTestCase { } r = w.getReader(); - s = new IndexSearcher(r); + s = newSearcher(r); w.close(); //System.out.println("Set up " + getName()); } diff --git a/lucene/src/test/org/apache/lucene/search/TestBooleanOr.java b/lucene/src/test/org/apache/lucene/search/TestBooleanOr.java index 850b3a8ff32..169cae15420 100644 --- a/lucene/src/test/org/apache/lucene/search/TestBooleanOr.java +++ b/lucene/src/test/org/apache/lucene/search/TestBooleanOr.java @@ -154,7 +154,7 @@ public class TestBooleanOr extends LuceneTestCase { reader = writer.getReader(); // - searcher = new IndexSearcher(reader); + searcher = newSearcher(reader); writer.close(); } diff --git a/lucene/src/test/org/apache/lucene/search/TestBooleanQuery.java b/lucene/src/test/org/apache/lucene/search/TestBooleanQuery.java index 4fa2a54440e..50b95728c99 100644 --- a/lucene/src/test/org/apache/lucene/search/TestBooleanQuery.java +++ b/lucene/src/test/org/apache/lucene/search/TestBooleanQuery.java @@ -17,13 +17,21 @@ package org.apache.lucene.search; * limitations under the License. */ -import org.apache.lucene.index.IndexReader; -import org.apache.lucene.index.RandomIndexWriter; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.TimeUnit; + +import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.MultiReader; +import org.apache.lucene.index.RandomIndexWriter; +import org.apache.lucene.index.Term; +import org.apache.lucene.queryParser.QueryParser; import org.apache.lucene.store.Directory; import org.apache.lucene.util.LuceneTestCase; -import org.apache.lucene.index.Term; +import org.apache.lucene.util.NamedThreadFactory; public class TestBooleanQuery extends LuceneTestCase { @@ -65,7 +73,7 @@ public class TestBooleanQuery extends LuceneTestCase { w.addDocument(doc); IndexReader r = w.getReader(); - IndexSearcher s = new IndexSearcher(r); + IndexSearcher s = newSearcher(r); BooleanQuery q = new BooleanQuery(); q.add(new TermQuery(new Term("field", "a")), BooleanClause.Occur.SHOULD); @@ -112,9 +120,50 @@ public class TestBooleanQuery extends LuceneTestCase { dmq.add(pq); assertEquals(1, s.search(dmq, 10).totalHits); + s.close(); r.close(); w.close(); dir.close(); } - + + public void testDeMorgan() throws Exception { + Directory dir1 = newDirectory(); + RandomIndexWriter iw1 = new RandomIndexWriter(random, dir1); + Document doc1 = new Document(); + doc1.add(newField("field", "foo bar", Field.Index.ANALYZED)); + iw1.addDocument(doc1); + IndexReader reader1 = iw1.getReader(); + iw1.close(); + + Directory dir2 = newDirectory(); + RandomIndexWriter iw2 = new RandomIndexWriter(random, dir2); + Document doc2 = new Document(); + doc2.add(newField("field", "foo baz", Field.Index.ANALYZED)); + iw2.addDocument(doc2); + IndexReader reader2 = iw2.getReader(); + iw2.close(); + + QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "field", new MockAnalyzer()); + qp.setMultiTermRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE); + + MultiReader multireader = new MultiReader(reader1, reader2); + IndexSearcher searcher = new IndexSearcher(multireader); + assertEquals(0, searcher.search(qp.parse("+foo -ba*"), 10).totalHits); + + final ExecutorService es = Executors.newCachedThreadPool(new NamedThreadFactory("NRT search threads")); + searcher = new IndexSearcher(multireader, es); + if (VERBOSE) + System.out.println("rewritten form: " + searcher.rewrite(qp.parse("+foo -ba*"))); + assertEquals(0, searcher.search(qp.parse("+foo -ba*"), 10).totalHits); + es.shutdown(); + es.awaitTermination(1, TimeUnit.SECONDS); + + multireader.close(); + reader1.close(); + reader2.close(); + dir1.close(); + dir2.close(); + } } + + diff --git a/lucene/src/test/org/apache/lucene/search/TestBooleanScorer.java b/lucene/src/test/org/apache/lucene/search/TestBooleanScorer.java index 297aabe3f4c..89cad9ff71a 100644 --- a/lucene/src/test/org/apache/lucene/search/TestBooleanScorer.java +++ b/lucene/src/test/org/apache/lucene/search/TestBooleanScorer.java @@ -25,6 +25,7 @@ import org.apache.lucene.document.Field; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.index.Term; +import org.apache.lucene.search.BooleanQuery.BooleanWeight; import org.apache.lucene.store.Directory; import org.apache.lucene.util.LuceneTestCase; @@ -55,9 +56,10 @@ public class TestBooleanScorer extends LuceneTestCase query.add(booleanQuery1, BooleanClause.Occur.MUST); query.add(new TermQuery(new Term(FIELD, "9")), BooleanClause.Occur.MUST_NOT); - IndexSearcher indexSearcher = new IndexSearcher(ir); + IndexSearcher indexSearcher = newSearcher(ir); ScoreDoc[] hits = indexSearcher.search(query, null, 1000).scoreDocs; assertEquals("Number of matched documents", 2, hits.length); + indexSearcher.close(); ir.close(); directory.close(); } @@ -68,8 +70,14 @@ public class TestBooleanScorer extends LuceneTestCase // 'more' variable to work properly, and this test ensures that if the logic // changes, we have a test to back it up. - Similarity sim = Similarity.getDefault(); - Scorer[] scorers = new Scorer[] {new Scorer(sim) { + Directory directory = newDirectory(); + RandomIndexWriter writer = new RandomIndexWriter(random, directory); + writer.commit(); + IndexReader ir = writer.getReader(); + writer.close(); + IndexSearcher searcher = newSearcher(ir); + BooleanWeight weight = (BooleanWeight) new BooleanQuery().createWeight(searcher); + Scorer[] scorers = new Scorer[] {new Scorer(weight) { private int doc = -1; @Override public float score() throws IOException { return 0; } @Override public int docID() { return doc; } @@ -83,10 +91,15 @@ public class TestBooleanScorer extends LuceneTestCase } }}; - BooleanScorer bs = new BooleanScorer(null, sim, 1, Arrays.asList(scorers), null, scorers.length); + + BooleanScorer bs = new BooleanScorer(weight, false, 1, Arrays.asList(scorers), null, scorers.length); assertEquals("should have received 3000", 3000, bs.nextDoc()); assertEquals("should have received NO_MORE_DOCS", DocIdSetIterator.NO_MORE_DOCS, bs.nextDoc()); + searcher.close(); + ir.close(); + directory.close(); + } } diff --git a/lucene/src/test/org/apache/lucene/search/TestCachingSpanFilter.java b/lucene/src/test/org/apache/lucene/search/TestCachingSpanFilter.java index ab58370934f..3424658ed27 100644 --- a/lucene/src/test/org/apache/lucene/search/TestCachingSpanFilter.java +++ b/lucene/src/test/org/apache/lucene/search/TestCachingSpanFilter.java @@ -47,8 +47,8 @@ public class TestCachingSpanFilter extends LuceneTestCase { // flipping a coin) may give us a newly opened reader, // but we use .reopen on this reader below and expect to // (must) get an NRT reader: - IndexReader reader = IndexReader.open(writer.w); - IndexSearcher searcher = new IndexSearcher(reader); + IndexReader reader = IndexReader.open(writer.w, true); + IndexSearcher searcher = newSearcher(reader); // add a doc, refresh the reader, and check that its there Document doc = new Document(); @@ -56,7 +56,8 @@ public class TestCachingSpanFilter extends LuceneTestCase { writer.addDocument(doc); reader = refreshReader(reader); - searcher = new IndexSearcher(reader); + searcher.close(); + searcher = newSearcher(reader); TopDocs docs = searcher.search(new MatchAllDocsQuery(), 1); assertEquals("Should find a hit...", 1, docs.totalHits); @@ -76,7 +77,8 @@ public class TestCachingSpanFilter extends LuceneTestCase { writer.deleteDocuments(new Term("id", "1")); reader = refreshReader(reader); - searcher = new IndexSearcher(reader); + searcher.close(); + searcher = newSearcher(reader); docs = searcher.search(new MatchAllDocsQuery(), filter, 1); assertEquals("[query + filter] Should *not* find a hit...", 0, docs.totalHits); @@ -90,7 +92,8 @@ public class TestCachingSpanFilter extends LuceneTestCase { writer.addDocument(doc); reader = refreshReader(reader); - searcher = new IndexSearcher(reader); + searcher.close(); + searcher = newSearcher(reader); docs = searcher.search(new MatchAllDocsQuery(), filter, 1); assertEquals("[query + filter] Should find a hit...", 1, docs.totalHits); @@ -108,7 +111,8 @@ public class TestCachingSpanFilter extends LuceneTestCase { // that had no new deletions reader = refreshReader(reader); assertTrue(reader != oldReader); - searcher = new IndexSearcher(reader); + searcher.close(); + searcher = newSearcher(reader); int missCount = filter.missCount; docs = searcher.search(constantScore, 1); assertEquals("[just filter] Should find a hit...", 1, docs.totalHits); @@ -118,7 +122,8 @@ public class TestCachingSpanFilter extends LuceneTestCase { writer.deleteDocuments(new Term("id", "1")); reader = refreshReader(reader); - searcher = new IndexSearcher(reader); + searcher.close(); + searcher = newSearcher(reader); docs = searcher.search(new MatchAllDocsQuery(), filter, 1); assertEquals("[query + filter] Should *not* find a hit...", 0, docs.totalHits); @@ -132,6 +137,7 @@ public class TestCachingSpanFilter extends LuceneTestCase { // entry: assertTrue(oldReader != null); + searcher.close(); writer.close(); reader.close(); dir.close(); diff --git a/lucene/src/test/org/apache/lucene/search/TestCachingWrapperFilter.java b/lucene/src/test/org/apache/lucene/search/TestCachingWrapperFilter.java index f56e440b0cd..357b3df1017 100644 --- a/lucene/src/test/org/apache/lucene/search/TestCachingWrapperFilter.java +++ b/lucene/src/test/org/apache/lucene/search/TestCachingWrapperFilter.java @@ -23,6 +23,7 @@ import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexReader.AtomicReaderContext; import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.index.SerialMergeScheduler; import org.apache.lucene.index.SlowMultiReaderWrapper; @@ -39,21 +40,21 @@ public class TestCachingWrapperFilter extends LuceneTestCase { RandomIndexWriter writer = new RandomIndexWriter(random, dir); writer.close(); - IndexReader reader = IndexReader.open(dir, true); - + IndexReader reader = new SlowMultiReaderWrapper(IndexReader.open(dir, true)); + AtomicReaderContext context = (AtomicReaderContext) reader.getTopReaderContext(); MockFilter filter = new MockFilter(); CachingWrapperFilter cacher = new CachingWrapperFilter(filter); // first time, nested filter is called - cacher.getDocIdSet(reader); + cacher.getDocIdSet(context); assertTrue("first time", filter.wasCalled()); // make sure no exception if cache is holding the wrong docIdSet - cacher.getDocIdSet(reader); + cacher.getDocIdSet(context); // second time, nested filter should not be called filter.clear(); - cacher.getDocIdSet(reader); + cacher.getDocIdSet(context); assertFalse("second time", filter.wasCalled()); reader.close(); @@ -65,18 +66,19 @@ public class TestCachingWrapperFilter extends LuceneTestCase { RandomIndexWriter writer = new RandomIndexWriter(random, dir); writer.close(); - IndexReader reader = IndexReader.open(dir, true); + IndexReader reader = new SlowMultiReaderWrapper(IndexReader.open(dir, true)); + AtomicReaderContext context = (AtomicReaderContext) reader.getTopReaderContext(); final Filter filter = new Filter() { @Override - public DocIdSet getDocIdSet(IndexReader reader) { + public DocIdSet getDocIdSet(AtomicReaderContext context) { return null; } }; CachingWrapperFilter cacher = new CachingWrapperFilter(filter); // the caching filter should return the empty set constant - assertSame(DocIdSet.EMPTY_DOCIDSET, cacher.getDocIdSet(reader)); + assertSame(DocIdSet.EMPTY_DOCIDSET, cacher.getDocIdSet(context)); reader.close(); dir.close(); @@ -87,11 +89,12 @@ public class TestCachingWrapperFilter extends LuceneTestCase { RandomIndexWriter writer = new RandomIndexWriter(random, dir); writer.close(); - IndexReader reader = IndexReader.open(dir, true); + IndexReader reader = new SlowMultiReaderWrapper(IndexReader.open(dir, true)); + AtomicReaderContext context = (AtomicReaderContext) reader.getTopReaderContext(); final Filter filter = new Filter() { @Override - public DocIdSet getDocIdSet(IndexReader reader) { + public DocIdSet getDocIdSet(AtomicReaderContext context) { return new DocIdSet() { @Override public DocIdSetIterator iterator() { @@ -103,16 +106,18 @@ public class TestCachingWrapperFilter extends LuceneTestCase { CachingWrapperFilter cacher = new CachingWrapperFilter(filter); // the caching filter should return the empty set constant - assertSame(DocIdSet.EMPTY_DOCIDSET, cacher.getDocIdSet(reader)); + assertSame(DocIdSet.EMPTY_DOCIDSET, cacher.getDocIdSet(context)); reader.close(); dir.close(); } private static void assertDocIdSetCacheable(IndexReader reader, Filter filter, boolean shouldCacheable) throws IOException { + assertTrue(reader.getTopReaderContext().isAtomic); + AtomicReaderContext context = (AtomicReaderContext) reader.getTopReaderContext(); final CachingWrapperFilter cacher = new CachingWrapperFilter(filter); - final DocIdSet originalSet = filter.getDocIdSet(reader); - final DocIdSet cachedSet = cacher.getDocIdSet(reader); + final DocIdSet originalSet = filter.getDocIdSet(context); + final DocIdSet cachedSet = cacher.getDocIdSet(context); assertTrue(cachedSet.isCacheable()); assertEquals(shouldCacheable, originalSet.isCacheable()); //System.out.println("Original: "+originalSet.getClass().getName()+" -- cached: "+cachedSet.getClass().getName()); @@ -140,7 +145,7 @@ public class TestCachingWrapperFilter extends LuceneTestCase { // a openbitset filter is always cacheable assertDocIdSetCacheable(reader, new Filter() { @Override - public DocIdSet getDocIdSet(IndexReader reader) { + public DocIdSet getDocIdSet(AtomicReaderContext context) { return new OpenBitSet(); } }, true); @@ -164,8 +169,8 @@ public class TestCachingWrapperFilter extends LuceneTestCase { // flipping a coin) may give us a newly opened reader, // but we use .reopen on this reader below and expect to // (must) get an NRT reader: - IndexReader reader = IndexReader.open(writer.w); - IndexSearcher searcher = new IndexSearcher(reader); + IndexReader reader = IndexReader.open(writer.w, true); + IndexSearcher searcher = newSearcher(reader); // add a doc, refresh the reader, and check that its there Document doc = new Document(); @@ -173,7 +178,8 @@ public class TestCachingWrapperFilter extends LuceneTestCase { writer.addDocument(doc); reader = refreshReader(reader); - searcher = new IndexSearcher(reader); + searcher.close(); + searcher = newSearcher(reader); TopDocs docs = searcher.search(new MatchAllDocsQuery(), 1); assertEquals("Should find a hit...", 1, docs.totalHits); @@ -193,7 +199,8 @@ public class TestCachingWrapperFilter extends LuceneTestCase { writer.deleteDocuments(new Term("id", "1")); reader = refreshReader(reader); - searcher = new IndexSearcher(reader); + searcher.close(); + searcher = newSearcher(reader); docs = searcher.search(new MatchAllDocsQuery(), filter, 1); assertEquals("[query + filter] Should *not* find a hit...", 0, docs.totalHits); @@ -208,7 +215,8 @@ public class TestCachingWrapperFilter extends LuceneTestCase { writer.addDocument(doc); reader = refreshReader(reader); - searcher = new IndexSearcher(reader); + searcher.close(); + searcher = newSearcher(reader); docs = searcher.search(new MatchAllDocsQuery(), filter, 1); @@ -227,7 +235,8 @@ public class TestCachingWrapperFilter extends LuceneTestCase { // that had no change to deletions reader = refreshReader(reader); assertTrue(reader != oldReader); - searcher = new IndexSearcher(reader); + searcher.close(); + searcher = newSearcher(reader); int missCount = filter.missCount; docs = searcher.search(constantScore, 1); assertEquals("[just filter] Should find a hit...", 1, docs.totalHits); @@ -237,7 +246,8 @@ public class TestCachingWrapperFilter extends LuceneTestCase { writer.deleteDocuments(new Term("id", "1")); reader = refreshReader(reader); - searcher = new IndexSearcher(reader); + searcher.close(); + searcher = newSearcher(reader); missCount = filter.missCount; docs = searcher.search(new MatchAllDocsQuery(), filter, 1); @@ -252,7 +262,8 @@ public class TestCachingWrapperFilter extends LuceneTestCase { writer.addDocument(doc); reader = refreshReader(reader); - searcher = new IndexSearcher(reader); + searcher.close(); + searcher = newSearcher(reader); docs = searcher.search(new MatchAllDocsQuery(), filter, 1); assertEquals("[query + filter] Should find a hit...", 1, docs.totalHits); @@ -264,7 +275,8 @@ public class TestCachingWrapperFilter extends LuceneTestCase { writer.deleteDocuments(new Term("id", "1")); reader = refreshReader(reader); - searcher = new IndexSearcher(reader); + searcher.close(); + searcher = newSearcher(reader); docs = searcher.search(new MatchAllDocsQuery(), filter, 1); assertEquals("[query + filter] Should *not* find a hit...", 0, docs.totalHits); @@ -282,6 +294,7 @@ public class TestCachingWrapperFilter extends LuceneTestCase { // entry: assertTrue(oldReader != null); + searcher.close(); reader.close(); writer.close(); dir.close(); diff --git a/lucene/src/test/org/apache/lucene/search/TestComplexExplanations.java b/lucene/src/test/org/apache/lucene/search/TestComplexExplanations.java index da1d75668de..0d3e274a022 100644 --- a/lucene/src/test/org/apache/lucene/search/TestComplexExplanations.java +++ b/lucene/src/test/org/apache/lucene/search/TestComplexExplanations.java @@ -34,7 +34,13 @@ public class TestComplexExplanations extends TestExplanations { @Override public void setUp() throws Exception { super.setUp(); - searcher.setSimilarity(createQnorm1Similarity()); + searcher.setSimilarityProvider(createQnorm1Similarity()); + } + + @Override + public void tearDown() throws Exception { + searcher.close(); + super.tearDown(); } // must be static for weight serialization tests diff --git a/lucene/src/test/org/apache/lucene/search/TestConstantScoreQuery.java b/lucene/src/test/org/apache/lucene/search/TestConstantScoreQuery.java index 874fc7f3a14..4b9e12b16a7 100644 --- a/lucene/src/test/org/apache/lucene/search/TestConstantScoreQuery.java +++ b/lucene/src/test/org/apache/lucene/search/TestConstantScoreQuery.java @@ -20,6 +20,7 @@ package org.apache.lucene.search; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexReader.AtomicReaderContext; import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.index.Term; import org.apache.lucene.store.Directory; @@ -68,7 +69,7 @@ public class TestConstantScoreQuery extends LuceneTestCase { } @Override - public void setNextReader(IndexReader reader, int docBase) { + public void setNextReader(AtomicReaderContext context) { } @Override @@ -93,10 +94,10 @@ public class TestConstantScoreQuery extends LuceneTestCase { reader = writer.getReader(); writer.close(); - searcher = new IndexSearcher(reader); + searcher = newSearcher(reader); // set a similarity that does not normalize our boost away - searcher.setSimilarity(new DefaultSimilarity() { + searcher.setSimilarityProvider(new DefaultSimilarity() { @Override public float queryNorm(float sumOfSquaredWeights) { return 1.0f; diff --git a/lucene/src/test/org/apache/lucene/search/TestDateFilter.java b/lucene/src/test/org/apache/lucene/search/TestDateFilter.java index 7a3ce2cf96b..37bb73f3406 100644 --- a/lucene/src/test/org/apache/lucene/search/TestDateFilter.java +++ b/lucene/src/test/org/apache/lucene/search/TestDateFilter.java @@ -57,7 +57,7 @@ public class TestDateFilter extends LuceneTestCase { IndexReader reader = writer.getReader(); writer.close(); - IndexSearcher searcher = new IndexSearcher(reader); + IndexSearcher searcher = newSearcher(reader); // filter that should preserve matches // DateFilter df1 = DateFilter.Before("datefield", now); @@ -98,6 +98,7 @@ public class TestDateFilter extends LuceneTestCase { result = searcher.search(query2, df2, 1000).scoreDocs; assertEquals(0, result.length); + searcher.close(); reader.close(); indexStore.close(); } @@ -123,7 +124,7 @@ public class TestDateFilter extends LuceneTestCase { IndexReader reader = writer.getReader(); writer.close(); - IndexSearcher searcher = new IndexSearcher(reader); + IndexSearcher searcher = newSearcher(reader); // filter that should preserve matches // DateFilter df1 = DateFilter.After("datefield", now); @@ -165,6 +166,7 @@ public class TestDateFilter extends LuceneTestCase { result = searcher.search(query2, df2, 1000).scoreDocs; assertEquals(0, result.length); + searcher.close(); reader.close(); indexStore.close(); } diff --git a/lucene/src/test/org/apache/lucene/search/TestDateSort.java b/lucene/src/test/org/apache/lucene/search/TestDateSort.java index 40627d0b5f9..5a4c1b3a1bf 100644 --- a/lucene/src/test/org/apache/lucene/search/TestDateSort.java +++ b/lucene/src/test/org/apache/lucene/search/TestDateSort.java @@ -28,7 +28,6 @@ import org.apache.lucene.document.Field; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.queryParser.QueryParser; -import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.Sort; import org.apache.lucene.search.SortField; @@ -78,7 +77,7 @@ public class TestDateSort extends LuceneTestCase { } public void testReverseDateSort() throws Exception { - IndexSearcher searcher = new IndexSearcher(reader); + IndexSearcher searcher = newSearcher(reader); Sort sort = new Sort(new SortField(DATE_TIME_FIELD, SortField.STRING, true)); diff --git a/lucene/src/test/org/apache/lucene/search/TestDisjunctionMaxQuery.java b/lucene/src/test/org/apache/lucene/search/TestDisjunctionMaxQuery.java index b89b6897c8a..012e95eb98f 100644 --- a/lucene/src/test/org/apache/lucene/search/TestDisjunctionMaxQuery.java +++ b/lucene/src/test/org/apache/lucene/search/TestDisjunctionMaxQuery.java @@ -22,9 +22,12 @@ import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexReader.AtomicReaderContext; import org.apache.lucene.index.SlowMultiReaderWrapper; +import org.apache.lucene.index.FieldInvertState; import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.index.Term; +import org.apache.lucene.search.Weight.ScorerContext; import org.apache.lucene.store.Directory; import java.text.DecimalFormat; @@ -59,8 +62,9 @@ public class TestDisjunctionMaxQuery extends LuceneTestCase { } @Override - public float lengthNorm(String fieldName, int numTerms) { - return 1.0f; + public float computeNorm(String fieldName, FieldInvertState state) { + // Disable length norm + return state.getBoost(); } @Override @@ -69,7 +73,7 @@ public class TestDisjunctionMaxQuery extends LuceneTestCase { } } - public Similarity sim = new TestSimilarity(); + public SimilarityProvider sim = new TestSimilarity(); public Directory index; public IndexReader r; public IndexSearcher s; @@ -81,7 +85,7 @@ public class TestDisjunctionMaxQuery extends LuceneTestCase { index = newDirectory(); RandomIndexWriter writer = new RandomIndexWriter(random, index, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer()) - .setSimilarity(sim)); + .setSimilarityProvider(sim).setMergePolicy(newInOrderLogMergePolicy())); // hed is the most important field, dek is secondary @@ -145,8 +149,8 @@ public class TestDisjunctionMaxQuery extends LuceneTestCase { r = new SlowMultiReaderWrapper(writer.getReader()); writer.close(); - s = new IndexSearcher(r); - s.setSimilarity(sim); + s = newSearcher(r); + s.setSimilarityProvider(sim); } @Override @@ -163,9 +167,9 @@ public class TestDisjunctionMaxQuery extends LuceneTestCase { dq.add(tq("dek", "DOES_NOT_EXIST")); QueryUtils.check(random, dq, s); - + assertTrue(s.getTopReaderContext().isAtomic); final Weight dw = dq.weight(s); - final Scorer ds = dw.scorer(s.getIndexReader(), true, false); + final Scorer ds = dw.scorer((AtomicReaderContext)s.getTopReaderContext(), ScorerContext.def()); final boolean skipOk = ds.advance(3) != DocIdSetIterator.NO_MORE_DOCS; if (skipOk) { fail("firsttime skipTo found a match? ... " @@ -177,11 +181,10 @@ public class TestDisjunctionMaxQuery extends LuceneTestCase { final DisjunctionMaxQuery dq = new DisjunctionMaxQuery(0.0f); dq.add(tq("dek", "albino")); dq.add(tq("dek", "DOES_NOT_EXIST")); - + assertTrue(s.getTopReaderContext().isAtomic); QueryUtils.check(random, dq, s); - final Weight dw = dq.weight(s); - final Scorer ds = dw.scorer(s.getIndexReader(), true, false); + final Scorer ds = dw.scorer((AtomicReaderContext)s.getTopReaderContext(), ScorerContext.def()); assertTrue("firsttime skipTo found no match", ds.advance(3) != DocIdSetIterator.NO_MORE_DOCS); assertEquals("found wrong docid", "d4", r.document(ds.docID()).get("id")); diff --git a/lucene/src/test/org/apache/lucene/search/TestDocBoost.java b/lucene/src/test/org/apache/lucene/search/TestDocBoost.java index 2555896ac1d..8521724a0c5 100644 --- a/lucene/src/test/org/apache/lucene/search/TestDocBoost.java +++ b/lucene/src/test/org/apache/lucene/search/TestDocBoost.java @@ -19,12 +19,14 @@ package org.apache.lucene.search; import java.io.IOException; -import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.document.*; +import org.apache.lucene.index.IndexReader.AtomicReaderContext; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.index.Term; import org.apache.lucene.store.Directory; +import org.apache.lucene.util.LuceneTestCase; /** Document boost unit test. * @@ -35,7 +37,7 @@ public class TestDocBoost extends LuceneTestCase { public void testDocBoost() throws Exception { Directory store = newDirectory(); - RandomIndexWriter writer = new RandomIndexWriter(random, store); + RandomIndexWriter writer = new RandomIndexWriter(random, store, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).setMergePolicy(newInOrderLogMergePolicy())); Fieldable f1 = newField("field", "word", Field.Store.YES, Field.Index.ANALYZED); Fieldable f2 = newField("field", "word", Field.Store.YES, Field.Index.ANALYZED); @@ -63,7 +65,7 @@ public class TestDocBoost extends LuceneTestCase { final float[] scores = new float[4]; - new IndexSearcher(reader).search + newSearcher(reader).search (new TermQuery(new Term("field", "word")), new Collector() { private int base = 0; @@ -77,8 +79,8 @@ public class TestDocBoost extends LuceneTestCase { scores[doc + base] = scorer.score(); } @Override - public void setNextReader(IndexReader reader, int docBase) { - base = docBase; + public void setNextReader(AtomicReaderContext context) { + base = context.docBase; } @Override public boolean acceptsDocsOutOfOrder() { diff --git a/lucene/src/test/org/apache/lucene/search/TestDocIdSet.java b/lucene/src/test/org/apache/lucene/search/TestDocIdSet.java index 78f517c67c7..f4b015e52e1 100644 --- a/lucene/src/test/org/apache/lucene/search/TestDocIdSet.java +++ b/lucene/src/test/org/apache/lucene/search/TestDocIdSet.java @@ -28,6 +28,7 @@ import org.apache.lucene.document.Document; import org.apache.lucene.document.Field.Index; import org.apache.lucene.document.Field.Store; import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexReader.AtomicReaderContext; import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.store.Directory; import org.apache.lucene.util.LuceneTestCase; @@ -108,13 +109,13 @@ public class TestDocIdSet extends LuceneTestCase { writer.close(); // First verify the document is searchable. - IndexSearcher searcher = new IndexSearcher(reader); + IndexSearcher searcher = newSearcher(reader); Assert.assertEquals(1, searcher.search(new MatchAllDocsQuery(), 10).totalHits); // Now search w/ a Filter which returns a null DocIdSet Filter f = new Filter() { @Override - public DocIdSet getDocIdSet(IndexReader reader) throws IOException { + public DocIdSet getDocIdSet(AtomicReaderContext context) throws IOException { return null; } }; diff --git a/lucene/src/test/org/apache/lucene/search/TestElevationComparator.java b/lucene/src/test/org/apache/lucene/search/TestElevationComparator.java index a99d2d0122b..cab656c48dc 100644 --- a/lucene/src/test/org/apache/lucene/search/TestElevationComparator.java +++ b/lucene/src/test/org/apache/lucene/search/TestElevationComparator.java @@ -21,6 +21,7 @@ import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.index.*; +import org.apache.lucene.index.IndexReader.AtomicReaderContext; import org.apache.lucene.search.FieldValueHitQueue.Entry; import org.apache.lucene.store.*; import org.apache.lucene.util.LuceneTestCase; @@ -49,10 +50,10 @@ public class TestElevationComparator extends LuceneTestCase { writer.addDocument(adoc(new String[] {"id", "y", "title", "boosted boosted", "str_s","y"})); writer.addDocument(adoc(new String[] {"id", "z", "title", "boosted boosted boosted","str_s", "z"})); - IndexReader r = IndexReader.open(writer); + IndexReader r = IndexReader.open(writer, true); writer.close(); - IndexSearcher searcher = new IndexSearcher(r); + IndexSearcher searcher = newSearcher(r); runTest(searcher, true); runTest(searcher, false); @@ -177,8 +178,8 @@ class ElevationComparatorSource extends FieldComparatorSource { } @Override - public FieldComparator setNextReader(IndexReader reader, int docBase) throws IOException { - idIndex = FieldCache.DEFAULT.getTermsIndex(reader, fieldname); + public FieldComparator setNextReader(AtomicReaderContext context) throws IOException { + idIndex = FieldCache.DEFAULT.getTermsIndex(context.reader, fieldname); return this; } diff --git a/lucene/src/test/org/apache/lucene/search/TestExplanations.java b/lucene/src/test/org/apache/lucene/search/TestExplanations.java index 2960a4e943b..3f2712af511 100644 --- a/lucene/src/test/org/apache/lucene/search/TestExplanations.java +++ b/lucene/src/test/org/apache/lucene/search/TestExplanations.java @@ -68,7 +68,7 @@ public class TestExplanations extends LuceneTestCase { public void setUp() throws Exception { super.setUp(); directory = newDirectory(); - RandomIndexWriter writer= new RandomIndexWriter(random, directory); + RandomIndexWriter writer= new RandomIndexWriter(random, directory, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).setMergePolicy(newInOrderLogMergePolicy())); for (int i = 0; i < docFields.length; i++) { Document doc = new Document(); doc.add(newField(KEY, ""+i, Field.Store.NO, Field.Index.NOT_ANALYZED)); @@ -77,7 +77,7 @@ public class TestExplanations extends LuceneTestCase { } reader = writer.getReader(); writer.close(); - searcher = new IndexSearcher(reader); + searcher = newSearcher(reader); } protected String[] docFields = { diff --git a/lucene/src/test/org/apache/lucene/search/TestFieldCache.java b/lucene/src/test/org/apache/lucene/search/TestFieldCache.java index 59de60331ad..1bca291c661 100644 --- a/lucene/src/test/org/apache/lucene/search/TestFieldCache.java +++ b/lucene/src/test/org/apache/lucene/search/TestFieldCache.java @@ -41,7 +41,7 @@ public class TestFieldCache extends LuceneTestCase { public void setUp() throws Exception { super.setUp(); directory = newDirectory(); - RandomIndexWriter writer= new RandomIndexWriter(random, directory); + RandomIndexWriter writer= new RandomIndexWriter(random, directory, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).setMergePolicy(newInOrderLogMergePolicy())); long theLong = Long.MAX_VALUE; double theDouble = Double.MAX_VALUE; byte theByte = Byte.MAX_VALUE; @@ -49,6 +49,10 @@ public class TestFieldCache extends LuceneTestCase { int theInt = Integer.MAX_VALUE; float theFloat = Float.MAX_VALUE; unicodeStrings = new String[NUM_DOCS]; + if (VERBOSE) { + System.out.println("TEST: setUp"); + } + writer.w.setInfoStream(VERBOSE ? System.out : null); for (int i = 0; i < NUM_DOCS; i++){ Document doc = new Document(); doc.add(newField("theLong", String.valueOf(theLong--), Field.Store.NO, Field.Index.NOT_ANALYZED)); @@ -117,7 +121,7 @@ public class TestFieldCache extends LuceneTestCase { assertSame("Second request with explicit parser return same array", longs, cache.getLongs(reader, "theLong", FieldCache.DEFAULT_LONG_PARSER)); assertTrue("longs Size: " + longs.length + " is not: " + NUM_DOCS, longs.length == NUM_DOCS); for (int i = 0; i < longs.length; i++) { - assertTrue(longs[i] + " does not equal: " + (Long.MAX_VALUE - i), longs[i] == (Long.MAX_VALUE - i)); + assertTrue(longs[i] + " does not equal: " + (Long.MAX_VALUE - i) + " i=" + i, longs[i] == (Long.MAX_VALUE - i)); } @@ -210,7 +214,7 @@ public class TestFieldCache extends LuceneTestCase { public void testEmptyIndex() throws Exception { Directory dir = newDirectory(); IndexWriter writer= new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer()).setMaxBufferedDocs(500)); - IndexReader r = IndexReader.open(writer); + IndexReader r = IndexReader.open(writer, true); FieldCache.DocTerms terms = FieldCache.DEFAULT.getTerms(r, "foobar"); FieldCache.DocTermsIndex termsIndex = FieldCache.DEFAULT.getTermsIndex(r, "foobar"); r.close(); diff --git a/lucene/src/test/org/apache/lucene/search/TestFieldCacheRangeFilter.java b/lucene/src/test/org/apache/lucene/search/TestFieldCacheRangeFilter.java index 5b77e632e43..db56940e110 100644 --- a/lucene/src/test/org/apache/lucene/search/TestFieldCacheRangeFilter.java +++ b/lucene/src/test/org/apache/lucene/search/TestFieldCacheRangeFilter.java @@ -44,7 +44,7 @@ public class TestFieldCacheRangeFilter extends BaseTestRangeFilter { public void testRangeFilterId() throws IOException { IndexReader reader = signedIndexReader; - IndexSearcher search = new IndexSearcher(reader); + IndexSearcher search = newSearcher(reader); int medId = ((maxId - minId) / 2); @@ -122,14 +122,14 @@ public class TestFieldCacheRangeFilter extends BaseTestRangeFilter { result = search.search(q,FieldCacheRangeFilter.newStringRange("id",medIP,medIP,T,T), numDocs).scoreDocs; assertEquals("med,med,T,T", 1, result.length); - + search.close(); } @Test public void testFieldCacheRangeFilterRand() throws IOException { IndexReader reader = signedIndexReader; - IndexSearcher search = new IndexSearcher(reader); + IndexSearcher search = newSearcher(reader); String minRP = pad(signedIndexDir.minR); String maxRP = pad(signedIndexDir.maxR); @@ -185,6 +185,7 @@ public class TestFieldCacheRangeFilter extends BaseTestRangeFilter { assertEquals("max,max,T,T", 1, result.length); result = search.search(q,FieldCacheRangeFilter.newStringRange("rand",maxRP,null,T,F), numDocs).scoreDocs; assertEquals("max,nul,T,T", 1, result.length); + search.close(); } // byte-ranges cannot be tested, because all ranges are too big for bytes, need an extra range for that @@ -193,7 +194,7 @@ public class TestFieldCacheRangeFilter extends BaseTestRangeFilter { public void testFieldCacheRangeFilterShorts() throws IOException { IndexReader reader = signedIndexReader; - IndexSearcher search = new IndexSearcher(reader); + IndexSearcher search = newSearcher(reader); int numDocs = reader.numDocs(); int medId = ((maxId - minId) / 2); @@ -277,13 +278,14 @@ public class TestFieldCacheRangeFilter extends BaseTestRangeFilter { assertEquals("overflow special case", 0, result.length); result = search.search(q,FieldCacheRangeFilter.newShortRange("id",maxIdO,minIdO,T,T), numDocs).scoreDocs; assertEquals("inverse range", 0, result.length); + search.close(); } @Test public void testFieldCacheRangeFilterInts() throws IOException { IndexReader reader = signedIndexReader; - IndexSearcher search = new IndexSearcher(reader); + IndexSearcher search = newSearcher(reader); int numDocs = reader.numDocs(); int medId = ((maxId - minId) / 2); @@ -368,13 +370,14 @@ public class TestFieldCacheRangeFilter extends BaseTestRangeFilter { assertEquals("overflow special case", 0, result.length); result = search.search(q,FieldCacheRangeFilter.newIntRange("id",maxIdO,minIdO,T,T), numDocs).scoreDocs; assertEquals("inverse range", 0, result.length); + search.close(); } @Test public void testFieldCacheRangeFilterLongs() throws IOException { IndexReader reader = signedIndexReader; - IndexSearcher search = new IndexSearcher(reader); + IndexSearcher search = newSearcher(reader); int numDocs = reader.numDocs(); int medId = ((maxId - minId) / 2); @@ -459,6 +462,7 @@ public class TestFieldCacheRangeFilter extends BaseTestRangeFilter { assertEquals("overflow special case", 0, result.length); result = search.search(q,FieldCacheRangeFilter.newLongRange("id",maxIdO,minIdO,T,T), numDocs).scoreDocs; assertEquals("inverse range", 0, result.length); + search.close(); } // float and double tests are a bit minimalistic, but its complicated, because missing precision @@ -467,7 +471,7 @@ public class TestFieldCacheRangeFilter extends BaseTestRangeFilter { public void testFieldCacheRangeFilterFloats() throws IOException { IndexReader reader = signedIndexReader; - IndexSearcher search = new IndexSearcher(reader); + IndexSearcher search = newSearcher(reader); int numDocs = reader.numDocs(); Float minIdO = Float.valueOf(minId + .5f); @@ -490,13 +494,14 @@ public class TestFieldCacheRangeFilter extends BaseTestRangeFilter { assertEquals("infinity special case", 0, result.length); result = search.search(q,FieldCacheRangeFilter.newFloatRange("id",null,Float.valueOf(Float.NEGATIVE_INFINITY),F,F), numDocs).scoreDocs; assertEquals("infinity special case", 0, result.length); + search.close(); } @Test public void testFieldCacheRangeFilterDoubles() throws IOException { IndexReader reader = signedIndexReader; - IndexSearcher search = new IndexSearcher(reader); + IndexSearcher search = newSearcher(reader); int numDocs = reader.numDocs(); Double minIdO = Double.valueOf(minId + .5); @@ -519,6 +524,7 @@ public class TestFieldCacheRangeFilter extends BaseTestRangeFilter { assertEquals("infinity special case", 0, result.length); result = search.search(q,FieldCacheRangeFilter.newDoubleRange("id",null, Double.valueOf(Double.NEGATIVE_INFINITY),F,F), numDocs).scoreDocs; assertEquals("infinity special case", 0, result.length); + search.close(); } // test using a sparse index (with deleted docs). @@ -539,7 +545,7 @@ public class TestFieldCacheRangeFilter extends BaseTestRangeFilter { writer.close(); IndexReader reader = IndexReader.open(dir, true); - IndexSearcher search = new IndexSearcher(reader); + IndexSearcher search = newSearcher(reader); assertTrue(reader.hasDeletions()); ScoreDoc[] result; @@ -559,6 +565,7 @@ public class TestFieldCacheRangeFilter extends BaseTestRangeFilter { result = search.search(q,FieldCacheRangeFilter.newByteRange("id",Byte.valueOf((byte) -20),Byte.valueOf((byte) -10),T,T), 100).scoreDocs; assertEquals("find all", 11, result.length); + search.close(); reader.close(); dir.close(); } diff --git a/lucene/src/test/org/apache/lucene/search/TestFieldCacheTermsFilter.java b/lucene/src/test/org/apache/lucene/search/TestFieldCacheTermsFilter.java index 3178f637b46..f526f3ea3ea 100644 --- a/lucene/src/test/org/apache/lucene/search/TestFieldCacheTermsFilter.java +++ b/lucene/src/test/org/apache/lucene/search/TestFieldCacheTermsFilter.java @@ -47,7 +47,7 @@ public class TestFieldCacheTermsFilter extends LuceneTestCase { IndexReader reader = w.getReader(); w.close(); - IndexSearcher searcher = new IndexSearcher(reader); + IndexSearcher searcher = newSearcher(reader); int numDocs = reader.numDocs(); ScoreDoc[] results; MatchAllDocsQuery q = new MatchAllDocsQuery(); @@ -68,6 +68,7 @@ public class TestFieldCacheTermsFilter extends LuceneTestCase { results = searcher.search(q, new FieldCacheTermsFilter(fieldName, terms.toArray(new String[0])), numDocs).scoreDocs; assertEquals("Must match 2", 2, results.length); + searcher.close(); reader.close(); rd.close(); } diff --git a/lucene/src/test/org/apache/lucene/search/TestFilteredQuery.java b/lucene/src/test/org/apache/lucene/search/TestFilteredQuery.java index ef0bf485aff..eeb282e1dd8 100644 --- a/lucene/src/test/org/apache/lucene/search/TestFilteredQuery.java +++ b/lucene/src/test/org/apache/lucene/search/TestFilteredQuery.java @@ -17,16 +17,19 @@ package org.apache.lucene.search; * limitations under the License. */ +import java.util.BitSet; + +import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; +import org.apache.lucene.index.IndexReader.AtomicReaderContext; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.index.Term; import org.apache.lucene.search.BooleanClause.Occur; import org.apache.lucene.store.Directory; -import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.DocIdBitSet; -import java.util.BitSet; +import org.apache.lucene.util.LuceneTestCase; /** * FilteredQuery JUnit tests. @@ -48,7 +51,7 @@ public class TestFilteredQuery extends LuceneTestCase { public void setUp() throws Exception { super.setUp(); directory = newDirectory(); - RandomIndexWriter writer = new RandomIndexWriter (random, directory); + RandomIndexWriter writer = new RandomIndexWriter (random, directory, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).setMergePolicy(newInOrderLogMergePolicy())); Document doc = new Document(); doc.add (newField("field", "one two three four five", Field.Store.YES, Field.Index.ANALYZED)); @@ -78,7 +81,7 @@ public class TestFilteredQuery extends LuceneTestCase { reader = writer.getReader(); writer.close (); - searcher = new IndexSearcher (reader); + searcher = newSearcher(reader); query = new TermQuery (new Term ("field", "three")); filter = newStaticFilterB(); } @@ -87,7 +90,7 @@ public class TestFilteredQuery extends LuceneTestCase { private static Filter newStaticFilterB() { return new Filter() { @Override - public DocIdSet getDocIdSet (IndexReader reader) { + public DocIdSet getDocIdSet (AtomicReaderContext context) { BitSet bitset = new BitSet(5); bitset.set (1); bitset.set (3); @@ -158,7 +161,7 @@ public class TestFilteredQuery extends LuceneTestCase { private static Filter newStaticFilterA() { return new Filter() { @Override - public DocIdSet getDocIdSet (IndexReader reader) { + public DocIdSet getDocIdSet (AtomicReaderContext context) { BitSet bitset = new BitSet(5); bitset.set(0, 5); return new DocIdBitSet(bitset); @@ -216,7 +219,7 @@ public class TestFilteredQuery extends LuceneTestCase { bq.add(new TermQuery(new Term("field", "two")), BooleanClause.Occur.SHOULD); ScoreDoc[] hits = searcher.search(query, 1000).scoreDocs; assertEquals(1, hits.length); - QueryUtils.check(random, query,searcher); + QueryUtils.check(random, query, searcher); } } diff --git a/lucene/src/test/org/apache/lucene/search/TestFilteredSearch.java b/lucene/src/test/org/apache/lucene/search/TestFilteredSearch.java index 951abdd246c..bada9039631 100644 --- a/lucene/src/test/org/apache/lucene/search/TestFilteredSearch.java +++ b/lucene/src/test/org/apache/lucene/search/TestFilteredSearch.java @@ -24,7 +24,7 @@ import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.index.CorruptIndexException; -import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexReader.AtomicReaderContext; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.Term; import org.apache.lucene.index.IndexWriterConfig.OpenMode; @@ -33,6 +33,7 @@ import org.apache.lucene.store.LockObtainFailedException; import org.apache.lucene.util.OpenBitSet; + /** * */ @@ -45,21 +46,21 @@ public class TestFilteredSearch extends LuceneTestCase { Directory directory = newDirectory(); int[] filterBits = {1, 36}; SimpleDocIdSetFilter filter = new SimpleDocIdSetFilter(filterBits); - IndexWriter writer = new IndexWriter(directory, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer())); + IndexWriter writer = new IndexWriter(directory, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer()).setMergePolicy(newInOrderLogMergePolicy())); searchFiltered(writer, directory, filter, enforceSingleSegment); // run the test on more than one segment enforceSingleSegment = false; // reset - it is stateful filter.reset(); writer.close(); - writer = new IndexWriter(directory, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer()).setOpenMode(OpenMode.CREATE).setMaxBufferedDocs(10)); + writer = new IndexWriter(directory, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer()).setOpenMode(OpenMode.CREATE).setMaxBufferedDocs(10).setMergePolicy(newInOrderLogMergePolicy())); // we index 60 docs - this will create 6 segments searchFiltered(writer, directory, filter, enforceSingleSegment); writer.close(); directory.close(); } - public void searchFiltered(IndexWriter writer, Directory directory, SimpleDocIdSetFilter filter, boolean optimize) { + public void searchFiltered(IndexWriter writer, Directory directory, Filter filter, boolean optimize) { try { for (int i = 0; i < 60; i++) {//Simple docs Document doc = new Document(); @@ -75,7 +76,6 @@ public class TestFilteredSearch extends LuceneTestCase { IndexSearcher indexSearcher = new IndexSearcher(directory, true); - filter.setTopReader(indexSearcher.getIndexReader()); ScoreDoc[] hits = indexSearcher.search(booleanQuery, filter, 1000).scoreDocs; assertEquals("Number of matched documents", 1, hits.length); indexSearcher.close(); @@ -89,20 +89,17 @@ public class TestFilteredSearch extends LuceneTestCase { public static final class SimpleDocIdSetFilter extends Filter { private final int[] docs; private int index; - private IndexReader topReader; + public SimpleDocIdSetFilter(int[] docs) { this.docs = docs; } - public void setTopReader(IndexReader r) { - topReader = r; - } - @Override - public DocIdSet getDocIdSet(IndexReader reader) { + public DocIdSet getDocIdSet(AtomicReaderContext context) { + assert context.isAtomic; final OpenBitSet set = new OpenBitSet(); - int docBase = topReader.getSubReaderDocBase(reader); - final int limit = docBase+reader.maxDoc(); + int docBase = context.docBase; + final int limit = docBase+context.reader.maxDoc(); for (;index < docs.length; index++) { final int docId = docs[index]; if(docId > limit) diff --git a/lucene/src/test/org/apache/lucene/search/TestFuzzyQuery.java b/lucene/src/test/org/apache/lucene/search/TestFuzzyQuery.java index 4fb415ffe45..c478a59f0d8 100644 --- a/lucene/src/test/org/apache/lucene/search/TestFuzzyQuery.java +++ b/lucene/src/test/org/apache/lucene/search/TestFuzzyQuery.java @@ -50,7 +50,7 @@ public class TestFuzzyQuery extends LuceneTestCase { addDoc("ddddd", writer); IndexReader reader = writer.getReader(); - IndexSearcher searcher = new IndexSearcher(reader); + IndexSearcher searcher = newSearcher(reader); writer.close(); FuzzyQuery query = new FuzzyQuery(new Term("field", "aaaaa"), FuzzyQuery.defaultMinSimilarity, 0); @@ -198,7 +198,7 @@ public class TestFuzzyQuery extends LuceneTestCase { addDoc("segment", writer); IndexReader reader = writer.getReader(); - IndexSearcher searcher = new IndexSearcher(reader); + IndexSearcher searcher = newSearcher(reader); writer.close(); FuzzyQuery query; @@ -309,7 +309,7 @@ public class TestFuzzyQuery extends LuceneTestCase { IndexReader ir2 = writer2.getReader(); MultiReader mr = new MultiReader(ir1, ir2); - IndexSearcher searcher = new IndexSearcher(mr); + IndexSearcher searcher = newSearcher(mr); FuzzyQuery fq = new FuzzyQuery(new Term("field", "z123456"), 1f, 0, 2); TopDocs docs = searcher.search(fq, 2); assertEquals(5, docs.totalHits); // 5 docs, from the a and b's @@ -330,7 +330,7 @@ public class TestFuzzyQuery extends LuceneTestCase { addDoc("segment", writer); IndexReader reader = writer.getReader(); - IndexSearcher searcher = new IndexSearcher(reader); + IndexSearcher searcher = newSearcher(reader); writer.close(); Query query; @@ -368,7 +368,7 @@ public class TestFuzzyQuery extends LuceneTestCase { addDoc("Lucenne", writer); IndexReader reader = writer.getReader(); - IndexSearcher searcher = new IndexSearcher(reader); + IndexSearcher searcher = newSearcher(reader); writer.close(); FuzzyQuery query = new FuzzyQuery(new Term("field", "lucene")); @@ -413,7 +413,7 @@ public class TestFuzzyQuery extends LuceneTestCase { Query q = new QueryParser(TEST_VERSION_CURRENT, "field", analyzer).parse( "giga~0.9" ); // 3. search - IndexSearcher searcher = new IndexSearcher(r); + IndexSearcher searcher = newSearcher(r); ScoreDoc[] hits = searcher.search(q, 10).scoreDocs; assertEquals(1, hits.length); assertEquals("Giga byte", searcher.doc(hits[0].doc).get("field")); @@ -435,7 +435,7 @@ public class TestFuzzyQuery extends LuceneTestCase { addDoc("test", w); addDoc("working", w); IndexReader reader = w.getReader(); - IndexSearcher searcher = new IndexSearcher(reader); + IndexSearcher searcher = newSearcher(reader); w.close(); QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "field", new MockAnalyzer()); diff --git a/lucene/src/test/org/apache/lucene/search/TestFuzzyQuery2.java b/lucene/src/test/org/apache/lucene/search/TestFuzzyQuery2.java index 314089b6ee9..262386a43b6 100644 --- a/lucene/src/test/org/apache/lucene/search/TestFuzzyQuery2.java +++ b/lucene/src/test/org/apache/lucene/search/TestFuzzyQuery2.java @@ -79,7 +79,7 @@ public class TestFuzzyQuery2 extends LuceneTestCase { int terms = (int) Math.pow(2, bits); Directory dir = newDirectory(); - RandomIndexWriter writer = new RandomIndexWriter(random, dir, new MockAnalyzer(MockTokenizer.KEYWORD, false)); + RandomIndexWriter writer = new RandomIndexWriter(random, dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(MockTokenizer.KEYWORD, false)).setMergePolicy(newInOrderLogMergePolicy())); Document doc = new Document(); Field field = newField("field", "", Field.Store.NO, Field.Index.ANALYZED); @@ -91,7 +91,7 @@ public class TestFuzzyQuery2 extends LuceneTestCase { } IndexReader r = writer.getReader(); - IndexSearcher searcher = new IndexSearcher(r); + IndexSearcher searcher = newSearcher(r); writer.close(); String line; while ((line = reader.readLine()) != null) { diff --git a/lucene/src/test/org/apache/lucene/search/TestMatchAllDocsQuery.java b/lucene/src/test/org/apache/lucene/search/TestMatchAllDocsQuery.java index 79e6f8b33e4..8d96c0feb2e 100644 --- a/lucene/src/test/org/apache/lucene/search/TestMatchAllDocsQuery.java +++ b/lucene/src/test/org/apache/lucene/search/TestMatchAllDocsQuery.java @@ -40,14 +40,14 @@ public class TestMatchAllDocsQuery extends LuceneTestCase { public void testQuery() throws Exception { Directory dir = newDirectory(); IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig( - TEST_VERSION_CURRENT, analyzer).setMaxBufferedDocs(2)); + TEST_VERSION_CURRENT, analyzer).setMaxBufferedDocs(2).setMergePolicy(newInOrderLogMergePolicy())); addDoc("one", iw, 1f); addDoc("two", iw, 20f); addDoc("three four", iw, 300f); iw.close(); IndexReader ir = IndexReader.open(dir, false); - IndexSearcher is = new IndexSearcher(ir); + IndexSearcher is = newSearcher(ir); ScoreDoc[] hits; // assert with norms scoring turned off @@ -69,7 +69,7 @@ public class TestMatchAllDocsQuery extends LuceneTestCase { assertEquals("one", ir.document(hits[2].doc).get("key")); // change norm & retest - ir.setNorm(0, "key", 400f); + ir.setNorm(0, "key", is.getSimilarityProvider().get("key").encodeNormValue(400f)); normsQuery = new MatchAllDocsQuery("key"); hits = is.search(normsQuery, null, 1000).scoreDocs; assertEquals(3, hits.length); @@ -93,7 +93,7 @@ public class TestMatchAllDocsQuery extends LuceneTestCase { assertEquals(1, hits.length); // delete a document: - is.getIndexReader().deleteDocument(0); + ir.deleteDocument(0); hits = is.search(new MatchAllDocsQuery(), null, 1000).scoreDocs; assertEquals(2, hits.length); diff --git a/lucene/src/test/org/apache/lucene/search/TestMultiPhraseQuery.java b/lucene/src/test/org/apache/lucene/search/TestMultiPhraseQuery.java index 43e3d5ad713..ef860f9737a 100644 --- a/lucene/src/test/org/apache/lucene/search/TestMultiPhraseQuery.java +++ b/lucene/src/test/org/apache/lucene/search/TestMultiPhraseQuery.java @@ -22,6 +22,7 @@ import org.apache.lucene.index.Term; import org.apache.lucene.index.TermsEnum; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.MultiFields; +import org.apache.lucene.search.Explanation.IDFExplanation; import org.apache.lucene.store.Directory; import org.apache.lucene.util.BytesRef; import org.apache.lucene.document.Document; @@ -30,6 +31,7 @@ import org.apache.lucene.document.Field; import org.apache.lucene.util.LuceneTestCase; import java.io.IOException; +import java.util.Collection; import java.util.LinkedList; /** @@ -51,7 +53,7 @@ public class TestMultiPhraseQuery extends LuceneTestCase { add("piccadilly circus", writer); IndexReader reader = writer.getReader(); - IndexSearcher searcher = new IndexSearcher(reader); + IndexSearcher searcher = newSearcher(reader); // search for "blueberry pi*": MultiPhraseQuery query1 = new MultiPhraseQuery(); @@ -140,12 +142,13 @@ public class TestMultiPhraseQuery extends LuceneTestCase { IndexReader r = writer.getReader(); writer.close(); - IndexSearcher searcher = new IndexSearcher(r); + IndexSearcher searcher = newSearcher(r); MultiPhraseQuery q = new MultiPhraseQuery(); q.add(new Term("body", "blueberry")); q.add(new Term("body", "chocolate")); q.add(new Term[] {new Term("body", "pie"), new Term("body", "tart")}); assertEquals(2, searcher.search(q, 1).totalHits); + searcher.close(); r.close(); indexStore.close(); } @@ -169,7 +172,7 @@ public class TestMultiPhraseQuery extends LuceneTestCase { add("blue raspberry pie", writer); IndexReader reader = writer.getReader(); - IndexSearcher searcher = new IndexSearcher(reader); + IndexSearcher searcher = newSearcher(reader); // This query will be equivalent to +body:pie +body:"blue*" BooleanQuery q = new BooleanQuery(); q.add(new TermQuery(new Term("body", "pie")), BooleanClause.Occur.MUST); @@ -200,7 +203,7 @@ public class TestMultiPhraseQuery extends LuceneTestCase { add("a note", "note", writer); IndexReader reader = writer.getReader(); - IndexSearcher searcher = new IndexSearcher(reader); + IndexSearcher searcher = newSearcher(reader); // This query will be equivalent to +type:note +body:"a t*" BooleanQuery q = new BooleanQuery(); @@ -227,7 +230,7 @@ public class TestMultiPhraseQuery extends LuceneTestCase { add("a note", "note", writer); IndexReader reader = writer.getReader(); - IndexSearcher searcher = new IndexSearcher(reader); + IndexSearcher searcher = newSearcher(reader); MultiPhraseQuery q = new MultiPhraseQuery(); q.add(new Term("body", "a")); @@ -285,4 +288,44 @@ public class TestMultiPhraseQuery extends LuceneTestCase { new MultiPhraseQuery().toString(); } + public void testCustomIDF() throws Exception { + Directory indexStore = newDirectory(); + RandomIndexWriter writer = new RandomIndexWriter(random, indexStore); + add("This is a test", "object", writer); + add("a note", "note", writer); + + IndexReader reader = writer.getReader(); + IndexSearcher searcher = newSearcher(reader); + searcher.setSimilarityProvider(new DefaultSimilarity() { + + @Override + public IDFExplanation idfExplain(Collection terms, + IndexSearcher searcher) throws IOException { + return new IDFExplanation() { + + @Override + public float getIdf() { + return 10f; + } + + @Override + public String explain() { + return "just a test"; + } + + }; + } + }); + + MultiPhraseQuery query = new MultiPhraseQuery(); + query.add(new Term[] { new Term("body", "this"), new Term("body", "that") }); + query.add(new Term("body", "is")); + Weight weight = query.createWeight(searcher); + assertEquals(10f * 10f, weight.sumOfSquaredWeights(), 0.001f); + + writer.close(); + searcher.close(); + reader.close(); + indexStore.close(); + } } diff --git a/lucene/src/test/org/apache/lucene/search/TestMultiTermConstantScore.java b/lucene/src/test/org/apache/lucene/search/TestMultiTermConstantScore.java index 7fc8ea6a799..5b52755f3fe 100644 --- a/lucene/src/test/org/apache/lucene/search/TestMultiTermConstantScore.java +++ b/lucene/src/test/org/apache/lucene/search/TestMultiTermConstantScore.java @@ -22,6 +22,7 @@ import org.apache.lucene.analysis.MockTokenizer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexReader.AtomicReaderContext; import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.index.Term; import org.apache.lucene.store.Directory; @@ -150,7 +151,7 @@ public class TestMultiTermConstantScore extends BaseTestRangeFilter { public void testEqualScores() throws IOException { // NOTE: uses index build in *this* setUp - IndexSearcher search = new IndexSearcher(reader); + IndexSearcher search = newSearcher(reader); ScoreDoc[] result; @@ -173,13 +174,14 @@ public class TestMultiTermConstantScore extends BaseTestRangeFilter { result[i].score); } + search.close(); } @Test public void testBoost() throws IOException { // NOTE: uses index build in *this* setUp - IndexSearcher search = new IndexSearcher(reader); + IndexSearcher search = newSearcher(reader); // test for correct application of query normalization // must use a non score normalizing method for this. @@ -197,8 +199,8 @@ public class TestMultiTermConstantScore extends BaseTestRangeFilter { assertEquals("score for doc " + (doc + base) + " was not correct", 1.0f, scorer.score()); } @Override - public void setNextReader(IndexReader reader, int docBase) { - base = docBase; + public void setNextReader(AtomicReaderContext context) { + base = context.docBase; } @Override public boolean acceptsDocsOutOfOrder() { @@ -245,13 +247,14 @@ public class TestMultiTermConstantScore extends BaseTestRangeFilter { Assert.assertEquals(0, hits[0].doc); Assert.assertEquals(1, hits[1].doc); assertTrue(hits[0].score > hits[1].score); + search.close(); } @Test public void testBooleanOrderUnAffected() throws IOException { // NOTE: uses index build in *this* setUp - IndexSearcher search = new IndexSearcher(reader); + IndexSearcher search = newSearcher(reader); // first do a regular TermRangeQuery which uses term expansion so // docs with more terms in range get higher scores @@ -276,6 +279,7 @@ public class TestMultiTermConstantScore extends BaseTestRangeFilter { actual[i].doc); } + search.close(); } @Test @@ -283,7 +287,7 @@ public class TestMultiTermConstantScore extends BaseTestRangeFilter { // NOTE: uses index build in *super* setUp IndexReader reader = signedIndexReader; - IndexSearcher search = new IndexSearcher(reader); + IndexSearcher search = newSearcher(reader); int medId = ((maxId - minId) / 2); @@ -404,6 +408,8 @@ public class TestMultiTermConstantScore extends BaseTestRangeFilter { result = search.search(csrq("id", medIP, medIP, T, T, MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT), null, numDocs).scoreDocs; assertEquals("med,med,T,T", 1, result.length); + + search.close(); } @Test @@ -411,7 +417,7 @@ public class TestMultiTermConstantScore extends BaseTestRangeFilter { // NOTE: uses index build in *super* setUp IndexReader reader = signedIndexReader; - IndexSearcher search = new IndexSearcher(reader); + IndexSearcher search = newSearcher(reader); int medId = ((maxId - minId) / 2); @@ -488,6 +494,8 @@ public class TestMultiTermConstantScore extends BaseTestRangeFilter { result = search.search(csrq("id", medIP, medIP, T, T, c), null, numDocs).scoreDocs; assertEquals("med,med,T,T,c", 1, result.length); + + search.close(); } @Test @@ -495,7 +503,7 @@ public class TestMultiTermConstantScore extends BaseTestRangeFilter { // NOTE: uses index build in *super* setUp IndexReader reader = signedIndexReader; - IndexSearcher search = new IndexSearcher(reader); + IndexSearcher search = newSearcher(reader); String minRP = pad(signedIndexDir.minR); String maxRP = pad(signedIndexDir.maxR); @@ -551,6 +559,7 @@ public class TestMultiTermConstantScore extends BaseTestRangeFilter { result = search.search(csrq("rand", maxRP, null, T, F), null, numDocs).scoreDocs; assertEquals("max,nul,T,T", 1, result.length); + search.close(); } @Test @@ -559,7 +568,7 @@ public class TestMultiTermConstantScore extends BaseTestRangeFilter { // using the unsigned index because collation seems to ignore hyphens IndexReader reader = unsignedIndexReader; - IndexSearcher search = new IndexSearcher(reader); + IndexSearcher search = newSearcher(reader); String minRP = pad(unsignedIndexDir.minR); String maxRP = pad(unsignedIndexDir.maxR); @@ -616,6 +625,8 @@ public class TestMultiTermConstantScore extends BaseTestRangeFilter { assertEquals("max,max,T,T,c", 1, result.length); result = search.search(csrq("rand", maxRP, null, T, F, c), null, numDocs).scoreDocs; assertEquals("max,nul,T,T,c", 1, result.length); + + search.close(); } @Test @@ -635,7 +646,7 @@ public class TestMultiTermConstantScore extends BaseTestRangeFilter { IndexReader reader = writer.getReader(); writer.close(); - IndexSearcher search = new IndexSearcher(reader); + IndexSearcher search = newSearcher(reader); // Neither Java 1.4.2 nor 1.5.0 has Farsi Locale collation available in // RuleBasedCollator. However, the Arabic Locale seems to order the Farsi @@ -680,7 +691,7 @@ public class TestMultiTermConstantScore extends BaseTestRangeFilter { IndexReader reader = writer.getReader(); writer.close(); - IndexSearcher search = new IndexSearcher(reader); + IndexSearcher search = newSearcher(reader); Collator c = Collator.getInstance(new Locale("da", "dk")); diff --git a/lucene/src/test/org/apache/lucene/search/TestMultiTermQueryRewrites.java b/lucene/src/test/org/apache/lucene/search/TestMultiTermQueryRewrites.java index 5e7f3f5597b..eb02ac03e3c 100644 --- a/lucene/src/test/org/apache/lucene/search/TestMultiTermQueryRewrites.java +++ b/lucene/src/test/org/apache/lucene/search/TestMultiTermQueryRewrites.java @@ -61,17 +61,17 @@ public class TestMultiTermQueryRewrites extends LuceneTestCase { writer.close(); swriter1.close(); swriter2.close(); reader = IndexReader.open(dir, true); - searcher = new IndexSearcher(reader); + searcher = newSearcher(reader); multiReader = new MultiReader(new IndexReader[] { IndexReader.open(sdir1, true), IndexReader.open(sdir2, true) }, true); - multiSearcher = new IndexSearcher(multiReader); + multiSearcher = newSearcher(multiReader); multiReaderDupls = new MultiReader(new IndexReader[] { IndexReader.open(sdir1, true), IndexReader.open(dir, true) }, true); - multiSearcherDupls = new IndexSearcher(multiReaderDupls); + multiSearcherDupls = newSearcher(multiReaderDupls); } @AfterClass diff --git a/lucene/src/test/org/apache/lucene/search/TestMultiThreadTermVectors.java b/lucene/src/test/org/apache/lucene/search/TestMultiThreadTermVectors.java index 8c8cc1a76c3..7657d25dc78 100644 --- a/lucene/src/test/org/apache/lucene/search/TestMultiThreadTermVectors.java +++ b/lucene/src/test/org/apache/lucene/search/TestMultiThreadTermVectors.java @@ -38,7 +38,7 @@ public class TestMultiThreadTermVectors extends LuceneTestCase { public void setUp() throws Exception { super.setUp(); directory = newDirectory(); - IndexWriter writer = new IndexWriter(directory, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer())); + IndexWriter writer = new IndexWriter(directory, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer()).setMergePolicy(newInOrderLogMergePolicy())); //writer.setUseCompoundFile(false); //writer.infoStream = System.out; for (int i = 0; i < numDocs; i++) { diff --git a/lucene/src/test/org/apache/lucene/search/TestMultiValuedNumericRangeQuery.java b/lucene/src/test/org/apache/lucene/search/TestMultiValuedNumericRangeQuery.java index df996e604e3..2d0e1a4522c 100644 --- a/lucene/src/test/org/apache/lucene/search/TestMultiValuedNumericRangeQuery.java +++ b/lucene/src/test/org/apache/lucene/search/TestMultiValuedNumericRangeQuery.java @@ -59,7 +59,7 @@ public class TestMultiValuedNumericRangeQuery extends LuceneTestCase { IndexReader reader = writer.getReader(); writer.close(); - IndexSearcher searcher=new IndexSearcher(reader); + IndexSearcher searcher=newSearcher(reader); num = 50 * RANDOM_MULTIPLIER; for (int i = 0; i < num; i++) { int lower=random.nextInt(Integer.MAX_VALUE); diff --git a/lucene/src/test/org/apache/lucene/search/TestNot.java b/lucene/src/test/org/apache/lucene/search/TestNot.java index 20f2d8f61a0..d86f13f8a31 100644 --- a/lucene/src/test/org/apache/lucene/search/TestNot.java +++ b/lucene/src/test/org/apache/lucene/search/TestNot.java @@ -44,7 +44,7 @@ public class TestNot extends LuceneTestCase { writer.addDocument(d1); IndexReader reader = writer.getReader(); - IndexSearcher searcher = new IndexSearcher(reader); + IndexSearcher searcher = newSearcher(reader); QueryParser parser = new QueryParser(TEST_VERSION_CURRENT, "field", new MockAnalyzer()); Query query = parser.parse("a NOT b"); //System.out.println(query); diff --git a/lucene/src/test/org/apache/lucene/search/TestNumericRangeQuery32.java b/lucene/src/test/org/apache/lucene/search/TestNumericRangeQuery32.java index 699e4c20f8c..b53c483dd63 100644 --- a/lucene/src/test/org/apache/lucene/search/TestNumericRangeQuery32.java +++ b/lucene/src/test/org/apache/lucene/search/TestNumericRangeQuery32.java @@ -22,6 +22,7 @@ import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.NumericField; import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexReader.AtomicReaderContext; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.MultiFields; import org.apache.lucene.index.RandomIndexWriter; @@ -54,7 +55,8 @@ public class TestNumericRangeQuery32 extends LuceneTestCase { directory = newDirectory(); RandomIndexWriter writer = new RandomIndexWriter(random, directory, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()) - .setMaxBufferedDocs(_TestUtil.nextInt(random, 50, 1000))); + .setMaxBufferedDocs(_TestUtil.nextInt(random, 50, 1000)) + .setMergePolicy(newInOrderLogMergePolicy())); NumericField field8 = new NumericField("field8", 8, Field.Store.YES, true), @@ -87,7 +89,7 @@ public class TestNumericRangeQuery32 extends LuceneTestCase { } reader = writer.getReader(); - searcher=new IndexSearcher(reader); + searcher=newSearcher(reader); writer.close(); } @@ -176,14 +178,15 @@ public class TestNumericRangeQuery32 extends LuceneTestCase { @Test public void testInverseRange() throws Exception { + AtomicReaderContext context = (AtomicReaderContext) new SlowMultiReaderWrapper(searcher.getIndexReader()).getTopReaderContext(); NumericRangeFilter f = NumericRangeFilter.newIntRange("field8", 8, 1000, -1000, true, true); - assertSame("A inverse range should return the EMPTY_DOCIDSET instance", DocIdSet.EMPTY_DOCIDSET, f.getDocIdSet(new SlowMultiReaderWrapper(searcher.getIndexReader()))); + assertSame("A inverse range should return the EMPTY_DOCIDSET instance", DocIdSet.EMPTY_DOCIDSET, f.getDocIdSet(context)); f = NumericRangeFilter.newIntRange("field8", 8, Integer.MAX_VALUE, null, false, false); assertSame("A exclusive range starting with Integer.MAX_VALUE should return the EMPTY_DOCIDSET instance", - DocIdSet.EMPTY_DOCIDSET, f.getDocIdSet(new SlowMultiReaderWrapper(searcher.getIndexReader()))); + DocIdSet.EMPTY_DOCIDSET, f.getDocIdSet(context)); f = NumericRangeFilter.newIntRange("field8", 8, null, Integer.MIN_VALUE, false, false); assertSame("A exclusive range ending with Integer.MIN_VALUE should return the EMPTY_DOCIDSET instance", - DocIdSet.EMPTY_DOCIDSET, f.getDocIdSet(new SlowMultiReaderWrapper(searcher.getIndexReader()))); + DocIdSet.EMPTY_DOCIDSET, f.getDocIdSet(context)); } @Test diff --git a/lucene/src/test/org/apache/lucene/search/TestNumericRangeQuery64.java b/lucene/src/test/org/apache/lucene/search/TestNumericRangeQuery64.java index 63a3409a8c6..321f698dfee 100644 --- a/lucene/src/test/org/apache/lucene/search/TestNumericRangeQuery64.java +++ b/lucene/src/test/org/apache/lucene/search/TestNumericRangeQuery64.java @@ -25,6 +25,7 @@ import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.index.SlowMultiReaderWrapper; +import org.apache.lucene.index.IndexReader.AtomicReaderContext; import org.apache.lucene.store.Directory; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.LuceneTestCase; @@ -51,7 +52,8 @@ public class TestNumericRangeQuery64 extends LuceneTestCase { directory = newDirectory(); RandomIndexWriter writer = new RandomIndexWriter(random, directory, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()) - .setMaxBufferedDocs(_TestUtil.nextInt(random, 50, 1000))); + .setMaxBufferedDocs(_TestUtil.nextInt(random, 50, 1000)) + .setMergePolicy(newInOrderLogMergePolicy())); NumericField field8 = new NumericField("field8", 8, Field.Store.YES, true), @@ -87,7 +89,7 @@ public class TestNumericRangeQuery64 extends LuceneTestCase { writer.addDocument(doc); } reader = writer.getReader(); - searcher=new IndexSearcher(reader); + searcher=newSearcher(reader); writer.close(); } @@ -181,14 +183,16 @@ public class TestNumericRangeQuery64 extends LuceneTestCase { @Test public void testInverseRange() throws Exception { + AtomicReaderContext context = (AtomicReaderContext) new SlowMultiReaderWrapper(searcher.getIndexReader()).getTopReaderContext(); NumericRangeFilter f = NumericRangeFilter.newLongRange("field8", 8, 1000L, -1000L, true, true); - assertSame("A inverse range should return the EMPTY_DOCIDSET instance", DocIdSet.EMPTY_DOCIDSET, f.getDocIdSet(new SlowMultiReaderWrapper(searcher.getIndexReader()))); + assertSame("A inverse range should return the EMPTY_DOCIDSET instance", DocIdSet.EMPTY_DOCIDSET, + f.getDocIdSet(context)); f = NumericRangeFilter.newLongRange("field8", 8, Long.MAX_VALUE, null, false, false); assertSame("A exclusive range starting with Long.MAX_VALUE should return the EMPTY_DOCIDSET instance", - DocIdSet.EMPTY_DOCIDSET, f.getDocIdSet(new SlowMultiReaderWrapper(searcher.getIndexReader()))); + DocIdSet.EMPTY_DOCIDSET, f.getDocIdSet(context)); f = NumericRangeFilter.newLongRange("field8", 8, null, Long.MIN_VALUE, false, false); assertSame("A exclusive range ending with Long.MIN_VALUE should return the EMPTY_DOCIDSET instance", - DocIdSet.EMPTY_DOCIDSET, f.getDocIdSet(new SlowMultiReaderWrapper(searcher.getIndexReader()))); + DocIdSet.EMPTY_DOCIDSET, f.getDocIdSet(context)); } @Test diff --git a/lucene/src/test/org/apache/lucene/search/TestPhrasePrefixQuery.java b/lucene/src/test/org/apache/lucene/search/TestPhrasePrefixQuery.java index 03985b1e879..b3d0128902d 100644 --- a/lucene/src/test/org/apache/lucene/search/TestPhrasePrefixQuery.java +++ b/lucene/src/test/org/apache/lucene/search/TestPhrasePrefixQuery.java @@ -65,7 +65,7 @@ public class TestPhrasePrefixQuery extends LuceneTestCase { IndexReader reader = writer.getReader(); writer.close(); - IndexSearcher searcher = new IndexSearcher(reader); + IndexSearcher searcher = newSearcher(reader); // PhrasePrefixQuery query1 = new PhrasePrefixQuery(); MultiPhraseQuery query1 = new MultiPhraseQuery(); diff --git a/lucene/src/test/org/apache/lucene/search/TestPhraseQuery.java b/lucene/src/test/org/apache/lucene/search/TestPhraseQuery.java index a9e9c78a320..35349c696ab 100644 --- a/lucene/src/test/org/apache/lucene/search/TestPhraseQuery.java +++ b/lucene/src/test/org/apache/lucene/search/TestPhraseQuery.java @@ -86,7 +86,7 @@ public class TestPhraseQuery extends LuceneTestCase { reader = writer.getReader(); writer.close(); - searcher = new IndexSearcher(reader); + searcher = newSearcher(reader); query = new PhraseQuery(); } @@ -221,7 +221,7 @@ public class TestPhraseQuery extends LuceneTestCase { IndexReader reader = writer.getReader(); writer.close(); - IndexSearcher searcher = new IndexSearcher(reader); + IndexSearcher searcher = newSearcher(reader); // valid exact phrase query PhraseQuery query = new PhraseQuery(); @@ -262,7 +262,7 @@ public class TestPhraseQuery extends LuceneTestCase { IndexReader reader = writer.getReader(); writer.close(); - IndexSearcher searcher = new IndexSearcher(reader); + IndexSearcher searcher = newSearcher(reader); PhraseQuery phraseQuery = new PhraseQuery(); phraseQuery.add(new Term("source", "marketing")); @@ -301,7 +301,7 @@ public class TestPhraseQuery extends LuceneTestCase { reader = writer.getReader(); writer.close(); - searcher = new IndexSearcher(reader); + searcher = newSearcher(reader); termQuery = new TermQuery(new Term("contents","woo")); phraseQuery = new PhraseQuery(); @@ -335,7 +335,7 @@ public class TestPhraseQuery extends LuceneTestCase { public void testSlopScoring() throws IOException { Directory directory = newDirectory(); - RandomIndexWriter writer = new RandomIndexWriter(random, directory); + RandomIndexWriter writer = new RandomIndexWriter(random, directory, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).setMergePolicy(newInOrderLogMergePolicy())); Document doc = new Document(); doc.add(newField("field", "foo firstname lastname foo", Field.Store.YES, Field.Index.ANALYZED)); @@ -352,7 +352,7 @@ public class TestPhraseQuery extends LuceneTestCase { IndexReader reader = writer.getReader(); writer.close(); - IndexSearcher searcher = new IndexSearcher(reader); + IndexSearcher searcher = newSearcher(reader); PhraseQuery query = new PhraseQuery(); query.add(new Term("field", "firstname")); query.add(new Term("field", "lastname")); @@ -598,7 +598,7 @@ public class TestPhraseQuery extends LuceneTestCase { Directory dir = newDirectory(); Analyzer analyzer = new MockAnalyzer(); - RandomIndexWriter w = new RandomIndexWriter(random, dir, analyzer); + RandomIndexWriter w = new RandomIndexWriter(random, dir, newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).setMergePolicy(newInOrderLogMergePolicy())); List> docs = new ArrayList>(); Document d = new Document(); Field f = newField("f", "", Field.Store.NO, Field.Index.ANALYZED); @@ -649,7 +649,7 @@ public class TestPhraseQuery extends LuceneTestCase { } IndexReader reader = w.getReader(); - IndexSearcher s = new IndexSearcher(reader); + IndexSearcher s = newSearcher(reader); w.close(); // now search diff --git a/lucene/src/test/org/apache/lucene/search/TestPositionIncrement.java b/lucene/src/test/org/apache/lucene/search/TestPositionIncrement.java index a57d7c0c222..3007d8a102e 100644 --- a/lucene/src/test/org/apache/lucene/search/TestPositionIncrement.java +++ b/lucene/src/test/org/apache/lucene/search/TestPositionIncrement.java @@ -40,6 +40,7 @@ import org.apache.lucene.index.Term; import org.apache.lucene.queryParser.QueryParser; import org.apache.lucene.store.Directory; import org.apache.lucene.search.payloads.PayloadSpanUtil; +import org.apache.lucene.search.spans.MultiSpansWrapper; import org.apache.lucene.search.spans.SpanNearQuery; import org.apache.lucene.search.spans.SpanQuery; import org.apache.lucene.search.spans.SpanTermQuery; @@ -95,7 +96,7 @@ public class TestPositionIncrement extends LuceneTestCase { writer.close(); - IndexSearcher searcher = new IndexSearcher(reader); + IndexSearcher searcher = newSearcher(reader); DocsAndPositionsEnum pos = MultiFields.getTermPositionsEnum(searcher.getIndexReader(), MultiFields.getDeletedDocs(searcher.getIndexReader()), @@ -243,14 +244,15 @@ public class TestPositionIncrement extends LuceneTestCase { "a a b c d e a f g h i j a b k k"))); writer.addDocument(doc); - IndexReader r = new SlowMultiReaderWrapper(writer.getReader()); + final IndexReader readerFromWriter = writer.getReader(); + SlowMultiReaderWrapper r = new SlowMultiReaderWrapper(readerFromWriter); DocsAndPositionsEnum tp = r.termPositionsEnum(r.getDeletedDocs(), "content", new BytesRef("a")); int count = 0; - assertTrue(tp.nextDoc() != tp.NO_MORE_DOCS); + assertTrue(tp.nextDoc() != DocsAndPositionsEnum.NO_MORE_DOCS); // "a" occurs 4 times assertEquals(4, tp.freq()); int expected = 0; @@ -260,9 +262,9 @@ public class TestPositionIncrement extends LuceneTestCase { assertEquals(6, tp.nextPosition()); // only one doc has "a" - assertEquals(tp.NO_MORE_DOCS, tp.nextDoc()); + assertEquals(DocsAndPositionsEnum.NO_MORE_DOCS, tp.nextDoc()); - IndexSearcher is = new IndexSearcher(r); + IndexSearcher is = newSearcher(readerFromWriter); SpanTermQuery stq1 = new SpanTermQuery(new Term("content", "a")); SpanTermQuery stq2 = new SpanTermQuery(new Term("content", "k")); @@ -274,7 +276,7 @@ public class TestPositionIncrement extends LuceneTestCase { if (VERBOSE) { System.out.println("\ngetPayloadSpans test"); } - Spans pspans = snq.getSpans(is.getIndexReader()); + Spans pspans = MultiSpansWrapper.wrap(is.getTopReaderContext(), snq); while (pspans.next()) { if (VERBOSE) { System.out.println("doc " + pspans.doc() + ": span " + pspans.start() @@ -289,11 +291,11 @@ public class TestPositionIncrement extends LuceneTestCase { } } } - assertEquals(5, count); assertTrue(sawZero); + assertEquals(5, count); // System.out.println("\ngetSpans test"); - Spans spans = snq.getSpans(is.getIndexReader()); + Spans spans = MultiSpansWrapper.wrap(is.getTopReaderContext(), snq); count = 0; sawZero = false; while (spans.next()) { @@ -308,7 +310,7 @@ public class TestPositionIncrement extends LuceneTestCase { // System.out.println("\nPayloadSpanUtil test"); sawZero = false; - PayloadSpanUtil psu = new PayloadSpanUtil(is.getIndexReader()); + PayloadSpanUtil psu = new PayloadSpanUtil(is.getTopReaderContext()); Collection pls = psu.getPayloadsForQuery(snq); count = pls.size(); for (byte[] bytes : pls) { diff --git a/lucene/src/test/org/apache/lucene/search/TestPositiveScoresOnlyCollector.java b/lucene/src/test/org/apache/lucene/search/TestPositiveScoresOnlyCollector.java index 28045441dce..4ef962c0518 100644 --- a/lucene/src/test/org/apache/lucene/search/TestPositiveScoresOnlyCollector.java +++ b/lucene/src/test/org/apache/lucene/search/TestPositiveScoresOnlyCollector.java @@ -19,6 +19,10 @@ package org.apache.lucene.search; import java.io.IOException; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.RandomIndexWriter; +import org.apache.lucene.index.Term; +import org.apache.lucene.store.Directory; import org.apache.lucene.util.LuceneTestCase; public class TestPositiveScoresOnlyCollector extends LuceneTestCase { @@ -26,8 +30,8 @@ public class TestPositiveScoresOnlyCollector extends LuceneTestCase { private static final class SimpleScorer extends Scorer { private int idx = -1; - public SimpleScorer() { - super(null); + public SimpleScorer(Weight weight) { + super(weight); } @Override public float score() throws IOException { @@ -65,7 +69,14 @@ public class TestPositiveScoresOnlyCollector extends LuceneTestCase { } } - Scorer s = new SimpleScorer(); + Directory directory = newDirectory(); + RandomIndexWriter writer = new RandomIndexWriter(random, directory); + writer.commit(); + IndexReader ir = writer.getReader(); + writer.close(); + IndexSearcher searcher = newSearcher(ir); + Weight fake = new TermQuery(new Term("fake", "weight")).createWeight(searcher); + Scorer s = new SimpleScorer(fake); TopDocsCollector tdc = TopScoreDocCollector.create(scores.length, true); Collector c = new PositiveScoresOnlyCollector(tdc); c.setScorer(s); @@ -78,6 +89,9 @@ public class TestPositiveScoresOnlyCollector extends LuceneTestCase { for (int i = 0; i < sd.length; i++) { assertTrue("only positive scores should return: " + sd[i].score, sd[i].score > 0); } + searcher.close(); + ir.close(); + directory.close(); } } diff --git a/lucene/src/test/org/apache/lucene/search/TestPrefixFilter.java b/lucene/src/test/org/apache/lucene/search/TestPrefixFilter.java index e1fffc98e22..890ffe180e8 100644 --- a/lucene/src/test/org/apache/lucene/search/TestPrefixFilter.java +++ b/lucene/src/test/org/apache/lucene/search/TestPrefixFilter.java @@ -48,7 +48,7 @@ public class TestPrefixFilter extends LuceneTestCase { // PrefixFilter combined with ConstantScoreQuery PrefixFilter filter = new PrefixFilter(new Term("category", "/Computers")); Query query = new ConstantScoreQuery(filter); - IndexSearcher searcher = new IndexSearcher(reader); + IndexSearcher searcher = newSearcher(reader); ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs; assertEquals(4, hits.length); diff --git a/lucene/src/test/org/apache/lucene/search/TestPrefixInBooleanQuery.java b/lucene/src/test/org/apache/lucene/search/TestPrefixInBooleanQuery.java index ddafd662857..1aab6b5be13 100644 --- a/lucene/src/test/org/apache/lucene/search/TestPrefixInBooleanQuery.java +++ b/lucene/src/test/org/apache/lucene/search/TestPrefixInBooleanQuery.java @@ -75,7 +75,7 @@ public class TestPrefixInBooleanQuery extends LuceneTestCase { } reader = writer.getReader(); - searcher = new IndexSearcher(reader); + searcher = newSearcher(reader); writer.close(); } diff --git a/lucene/src/test/org/apache/lucene/search/TestPrefixQuery.java b/lucene/src/test/org/apache/lucene/search/TestPrefixQuery.java index 0a521e1c124..5ab57283ec2 100644 --- a/lucene/src/test/org/apache/lucene/search/TestPrefixQuery.java +++ b/lucene/src/test/org/apache/lucene/search/TestPrefixQuery.java @@ -47,7 +47,7 @@ public class TestPrefixQuery extends LuceneTestCase { IndexReader reader = writer.getReader(); PrefixQuery query = new PrefixQuery(new Term("category", "/Computers")); - IndexSearcher searcher = new IndexSearcher(reader); + IndexSearcher searcher = newSearcher(reader); ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs; assertEquals("All documents in /Computers category and below", 3, hits.length); diff --git a/lucene/src/test/org/apache/lucene/search/TestPrefixRandom.java b/lucene/src/test/org/apache/lucene/search/TestPrefixRandom.java index cb63593b252..46c1a75aab2 100644 --- a/lucene/src/test/org/apache/lucene/search/TestPrefixRandom.java +++ b/lucene/src/test/org/apache/lucene/search/TestPrefixRandom.java @@ -65,7 +65,7 @@ public class TestPrefixRandom extends LuceneTestCase { writer.addDocument(doc); } reader = writer.getReader(); - searcher = new IndexSearcher(reader); + searcher = newSearcher(reader); writer.close(); } diff --git a/lucene/src/test/org/apache/lucene/search/TestQueryWrapperFilter.java b/lucene/src/test/org/apache/lucene/search/TestQueryWrapperFilter.java index a41e33fbf7f..37d522c3c05 100644 --- a/lucene/src/test/org/apache/lucene/search/TestQueryWrapperFilter.java +++ b/lucene/src/test/org/apache/lucene/search/TestQueryWrapperFilter.java @@ -43,7 +43,7 @@ public class TestQueryWrapperFilter extends LuceneTestCase { // should not throw exception with primitive query QueryWrapperFilter qwf = new QueryWrapperFilter(termQuery); - IndexSearcher searcher = new IndexSearcher(reader); + IndexSearcher searcher = newSearcher(reader); TopDocs hits = searcher.search(new MatchAllDocsQuery(), qwf, 10); assertEquals(1, hits.totalHits); hits = searcher.search(new MatchAllDocsQuery(), new CachingWrapperFilter(qwf), 10); diff --git a/lucene/src/test/org/apache/lucene/search/TestRegexpQuery.java b/lucene/src/test/org/apache/lucene/search/TestRegexpQuery.java index 254246bf210..75036679595 100644 --- a/lucene/src/test/org/apache/lucene/search/TestRegexpQuery.java +++ b/lucene/src/test/org/apache/lucene/search/TestRegexpQuery.java @@ -42,6 +42,7 @@ public class TestRegexpQuery extends LuceneTestCase { private Directory directory; private final String FN = "field"; + @Override public void setUp() throws Exception { super.setUp(); directory = newDirectory(); @@ -53,9 +54,10 @@ public class TestRegexpQuery extends LuceneTestCase { writer.addDocument(doc); reader = writer.getReader(); writer.close(); - searcher = new IndexSearcher(reader); + searcher = newSearcher(reader); } + @Override public void tearDown() throws Exception { searcher.close(); reader.close(); diff --git a/lucene/src/test/org/apache/lucene/search/TestRegexpRandom.java b/lucene/src/test/org/apache/lucene/search/TestRegexpRandom.java index d50a02400d9..7180d3b82ec 100644 --- a/lucene/src/test/org/apache/lucene/search/TestRegexpRandom.java +++ b/lucene/src/test/org/apache/lucene/search/TestRegexpRandom.java @@ -62,7 +62,7 @@ public class TestRegexpRandom extends LuceneTestCase { reader = writer.getReader(); writer.close(); - searcher = new IndexSearcher(reader); + searcher = newSearcher(reader); } private char N() { diff --git a/lucene/src/test/org/apache/lucene/search/TestRegexpRandom2.java b/lucene/src/test/org/apache/lucene/search/TestRegexpRandom2.java index a6627521831..143d977cfb7 100644 --- a/lucene/src/test/org/apache/lucene/search/TestRegexpRandom2.java +++ b/lucene/src/test/org/apache/lucene/search/TestRegexpRandom2.java @@ -82,7 +82,7 @@ public class TestRegexpRandom2 extends LuceneTestCase { } reader = writer.getReader(); - searcher = new IndexSearcher(reader); + searcher = newSearcher(reader); writer.close(); } diff --git a/lucene/src/test/org/apache/lucene/search/TestScoreCachingWrappingScorer.java b/lucene/src/test/org/apache/lucene/search/TestScoreCachingWrappingScorer.java index 69ae819c73d..664f1810805 100644 --- a/lucene/src/test/org/apache/lucene/search/TestScoreCachingWrappingScorer.java +++ b/lucene/src/test/org/apache/lucene/search/TestScoreCachingWrappingScorer.java @@ -20,6 +20,10 @@ package org.apache.lucene.search; import java.io.IOException; import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.RandomIndexWriter; +import org.apache.lucene.index.IndexReader.AtomicReaderContext; +import org.apache.lucene.index.Term; +import org.apache.lucene.store.Directory; import org.apache.lucene.util.LuceneTestCase; public class TestScoreCachingWrappingScorer extends LuceneTestCase { @@ -28,8 +32,8 @@ public class TestScoreCachingWrappingScorer extends LuceneTestCase { private int idx = 0; private int doc = -1; - public SimpleScorer() { - super(null); + public SimpleScorer(Weight weight) { + super(weight); } @Override public float score() throws IOException { @@ -76,7 +80,7 @@ public class TestScoreCachingWrappingScorer extends LuceneTestCase { ++idx; } - @Override public void setNextReader(IndexReader reader, int docBase) + @Override public void setNextReader(AtomicReaderContext context) throws IOException { } @@ -95,8 +99,14 @@ public class TestScoreCachingWrappingScorer extends LuceneTestCase { 8.108544f, 4.961808f, 2.2423935f, 7.285586f, 4.6699767f }; public void testGetScores() throws Exception { - - Scorer s = new SimpleScorer(); + Directory directory = newDirectory(); + RandomIndexWriter writer = new RandomIndexWriter(random, directory); + writer.commit(); + IndexReader ir = writer.getReader(); + writer.close(); + IndexSearcher searcher = newSearcher(ir); + Weight fake = new TermQuery(new Term("fake", "weight")).createWeight(searcher); + Scorer s = new SimpleScorer(fake); ScoreCachingCollector scc = new ScoreCachingCollector(scores.length); scc.setScorer(s); @@ -109,7 +119,9 @@ public class TestScoreCachingWrappingScorer extends LuceneTestCase { for (int i = 0; i < scores.length; i++) { assertEquals(scores[i], scc.mscores[i], 0f); } - + searcher.close(); + ir.close(); + directory.close(); } } diff --git a/lucene/src/test/org/apache/lucene/search/TestScorerPerf.java b/lucene/src/test/org/apache/lucene/search/TestScorerPerf.java index 7aba01f55ee..1fc436f8b3b 100755 --- a/lucene/src/test/org/apache/lucene/search/TestScorerPerf.java +++ b/lucene/src/test/org/apache/lucene/search/TestScorerPerf.java @@ -6,7 +6,7 @@ import org.apache.lucene.util.LuceneTestCase; import java.util.BitSet; import java.io.IOException; -import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexReader.AtomicReaderContext; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.Term; import org.apache.lucene.index.IndexWriterConfig.OpenMode; @@ -109,8 +109,8 @@ public class TestScorerPerf extends LuceneTestCase { public int getSum() { return sum; } @Override - public void setNextReader(IndexReader reader, int base) { - docBase = base; + public void setNextReader(AtomicReaderContext context) { + docBase = context.docBase; } @Override public boolean acceptsDocsOutOfOrder() { @@ -141,7 +141,7 @@ public class TestScorerPerf extends LuceneTestCase { final BitSet rnd = sets[random.nextInt(sets.length)]; Query q = new ConstantScoreQuery(new Filter() { @Override - public DocIdSet getDocIdSet(IndexReader reader) { + public DocIdSet getDocIdSet(AtomicReaderContext context) { return new DocIdBitSet(rnd); } }); diff --git a/lucene/src/test/org/apache/lucene/search/TestSearchWithThreads.java b/lucene/src/test/org/apache/lucene/search/TestSearchWithThreads.java index 0adba6edf49..8e0456bc3ff 100644 --- a/lucene/src/test/org/apache/lucene/search/TestSearchWithThreads.java +++ b/lucene/src/test/org/apache/lucene/search/TestSearchWithThreads.java @@ -63,7 +63,7 @@ public class TestSearchWithThreads extends LuceneTestCase { final long endTime = System.currentTimeMillis(); if (VERBOSE) System.out.println("BUILD took " + (endTime-startTime)); - final IndexSearcher s = new IndexSearcher(r); + final IndexSearcher s = newSearcher(r); final AtomicBoolean failed = new AtomicBoolean(); final AtomicLong netSearch = new AtomicLong(); diff --git a/lucene/src/test/org/apache/lucene/search/TestSetNorm.java b/lucene/src/test/org/apache/lucene/search/TestSetNorm.java index a117ee30c6b..7c873bacae4 100644 --- a/lucene/src/test/org/apache/lucene/search/TestSetNorm.java +++ b/lucene/src/test/org/apache/lucene/search/TestSetNorm.java @@ -23,6 +23,7 @@ import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.document.*; import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexReader.AtomicReaderContext; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.Term; import org.apache.lucene.store.Directory; @@ -50,10 +51,11 @@ public class TestSetNorm extends LuceneTestCase { // reset the boost of each instance of this document IndexReader reader = IndexReader.open(store, false); - reader.setNorm(0, "field", 1.0f); - reader.setNorm(1, "field", 2.0f); - reader.setNorm(2, "field", 4.0f); - reader.setNorm(3, "field", 16.0f); + Similarity similarity = new DefaultSimilarity().get("field"); + reader.setNorm(0, "field", similarity.encodeNormValue(1.0f)); + reader.setNorm(1, "field", similarity.encodeNormValue(2.0f)); + reader.setNorm(2, "field", similarity.encodeNormValue(4.0f)); + reader.setNorm(3, "field", similarity.encodeNormValue(16.0f)); reader.close(); // check that searches are ordered by this boost @@ -74,8 +76,8 @@ public class TestSetNorm extends LuceneTestCase { scores[doc + base] = scorer.score(); } @Override - public void setNextReader(IndexReader reader, int docBase) { - base = docBase; + public void setNextReader(AtomicReaderContext context) { + base = context.docBase; } @Override public boolean acceptsDocsOutOfOrder() { diff --git a/lucene/src/test/org/apache/lucene/search/TestSimilarity.java b/lucene/src/test/org/apache/lucene/search/TestSimilarity.java index 9518a025ed9..d788799db9d 100644 --- a/lucene/src/test/org/apache/lucene/search/TestSimilarity.java +++ b/lucene/src/test/org/apache/lucene/search/TestSimilarity.java @@ -21,7 +21,9 @@ import org.apache.lucene.util.LuceneTestCase; import java.io.IOException; import java.util.Collection; +import org.apache.lucene.index.FieldInvertState; import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexReader.AtomicReaderContext; import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.index.Term; import org.apache.lucene.store.Directory; @@ -37,13 +39,11 @@ import org.apache.lucene.search.Explanation.IDFExplanation; */ public class TestSimilarity extends LuceneTestCase { - public static class SimpleSimilarity extends Similarity { - @Override public float lengthNorm(String field, int numTerms) { return 1.0f; } - @Override public float queryNorm(float sumOfSquaredWeights) { return 1.0f; } + public static class SimpleSimilarity extends Similarity implements SimilarityProvider { + @Override public float computeNorm(String field, FieldInvertState state) { return state.getBoost(); } @Override public float tf(float freq) { return freq; } @Override public float sloppyFreq(int distance) { return 2.0f; } @Override public float idf(int docFreq, int numDocs) { return 1.0f; } - @Override public float coord(int overlap, int maxOverlap) { return 1.0f; } @Override public IDFExplanation idfExplain(Collection terms, IndexSearcher searcher) throws IOException { return new IDFExplanation() { @Override @@ -56,13 +56,18 @@ public class TestSimilarity extends LuceneTestCase { } }; } + public float queryNorm(float sumOfSquaredWeights) { return 1.0f; } + public float coord(int overlap, int maxOverlap) { return 1.0f; } + public Similarity get(String field) { + return this; + } } public void testSimilarity() throws Exception { Directory store = newDirectory(); RandomIndexWriter writer = new RandomIndexWriter(random, store, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer()) - .setSimilarity(new SimpleSimilarity())); + .setSimilarityProvider(new SimpleSimilarity())); Document d1 = new Document(); d1.add(newField("field", "a c", Field.Store.YES, Field.Index.ANALYZED)); @@ -75,8 +80,8 @@ public class TestSimilarity extends LuceneTestCase { IndexReader reader = writer.getReader(); writer.close(); - IndexSearcher searcher = new IndexSearcher(reader); - searcher.setSimilarity(new SimpleSimilarity()); + IndexSearcher searcher = newSearcher(reader); + searcher.setSimilarityProvider(new SimpleSimilarity()); Term a = new Term("field", "a"); Term b = new Term("field", "b"); @@ -93,7 +98,7 @@ public class TestSimilarity extends LuceneTestCase { assertEquals(1.0f, scorer.score()); } @Override - public void setNextReader(IndexReader reader, int docBase) {} + public void setNextReader(AtomicReaderContext context) {} @Override public boolean acceptsDocsOutOfOrder() { return true; @@ -117,8 +122,8 @@ public class TestSimilarity extends LuceneTestCase { assertEquals((float)doc+base+1, scorer.score()); } @Override - public void setNextReader(IndexReader reader, int docBase) { - base = docBase; + public void setNextReader(AtomicReaderContext context) { + base = context.docBase; } @Override public boolean acceptsDocsOutOfOrder() { @@ -143,7 +148,7 @@ public class TestSimilarity extends LuceneTestCase { assertEquals(1.0f, scorer.score()); } @Override - public void setNextReader(IndexReader reader, int docBase) {} + public void setNextReader(AtomicReaderContext context) {} @Override public boolean acceptsDocsOutOfOrder() { return true; @@ -164,7 +169,7 @@ public class TestSimilarity extends LuceneTestCase { assertEquals(2.0f, scorer.score()); } @Override - public void setNextReader(IndexReader reader, int docBase) {} + public void setNextReader(AtomicReaderContext context) {} @Override public boolean acceptsDocsOutOfOrder() { return true; diff --git a/lucene/src/test/org/apache/lucene/search/TestSimilarityProvider.java b/lucene/src/test/org/apache/lucene/search/TestSimilarityProvider.java new file mode 100644 index 00000000000..7a8f123be71 --- /dev/null +++ b/lucene/src/test/org/apache/lucene/search/TestSimilarityProvider.java @@ -0,0 +1,151 @@ +package org.apache.lucene.search; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.index.FieldInvertState; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.index.MultiNorms; +import org.apache.lucene.index.RandomIndexWriter; +import org.apache.lucene.index.Term; +import org.apache.lucene.store.Directory; +import org.apache.lucene.util.LuceneTestCase; + +public class TestSimilarityProvider extends LuceneTestCase { + private Directory directory; + private IndexReader reader; + private IndexSearcher searcher; + + @Override + public void setUp() throws Exception { + super.setUp(); + directory = newDirectory(); + SimilarityProvider sim = new ExampleSimilarityProvider(); + IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, + new MockAnalyzer()).setSimilarityProvider(sim); + RandomIndexWriter iw = new RandomIndexWriter(random, directory, iwc); + Document doc = new Document(); + Field field = newField("foo", "", Field.Store.NO, Field.Index.ANALYZED); + doc.add(field); + Field field2 = newField("bar", "", Field.Store.NO, Field.Index.ANALYZED); + doc.add(field2); + + field.setValue("quick brown fox"); + field2.setValue("quick brown fox"); + iw.addDocument(doc); + field.setValue("jumps over lazy brown dog"); + field2.setValue("jumps over lazy brown dog"); + iw.addDocument(doc); + reader = iw.getReader(); + iw.close(); + searcher = newSearcher(reader); + searcher.setSimilarityProvider(sim); + } + + @Override + public void tearDown() throws Exception { + searcher.close(); + reader.close(); + directory.close(); + super.tearDown(); + } + + public void testBasics() throws Exception { + // sanity check of norms writer + byte fooNorms[] = MultiNorms.norms(reader, "foo"); + byte barNorms[] = MultiNorms.norms(reader, "bar"); + for (int i = 0; i < fooNorms.length; i++) { + assertFalse(fooNorms[i] == barNorms[i]); + } + + // sanity check of searching + TopDocs foodocs = searcher.search(new TermQuery(new Term("foo", "brown")), 10); + assertTrue(foodocs.totalHits > 0); + TopDocs bardocs = searcher.search(new TermQuery(new Term("bar", "brown")), 10); + assertTrue(bardocs.totalHits > 0); + assertTrue(foodocs.scoreDocs[0].score < bardocs.scoreDocs[0].score); + } + + private class ExampleSimilarityProvider implements SimilarityProvider { + private Similarity sim1 = new Sim1(); + private Similarity sim2 = new Sim2(); + + public float coord(int overlap, int maxOverlap) { + return 1f; + } + + public float queryNorm(float sumOfSquaredWeights) { + return 1f; + } + + public Similarity get(String field) { + if (field.equals("foo")) { + return sim1; + } else { + return sim2; + } + } + } + + private class Sim1 extends Similarity { + @Override + public float computeNorm(String field, FieldInvertState state) { + return 1f; + } + + @Override + public float sloppyFreq(int distance) { + return 1f; + } + + @Override + public float tf(float freq) { + return 1f; + } + + @Override + public float idf(int docFreq, int numDocs) { + return 1f; + } + } + + private class Sim2 extends Similarity { + @Override + public float computeNorm(String field, FieldInvertState state) { + return 10f; + } + + @Override + public float sloppyFreq(int distance) { + return 10f; + } + + @Override + public float tf(float freq) { + return 10f; + } + + @Override + public float idf(int docFreq, int numDocs) { + return 10f; + } + } +} diff --git a/lucene/src/test/org/apache/lucene/search/TestSloppyPhraseQuery.java b/lucene/src/test/org/apache/lucene/search/TestSloppyPhraseQuery.java index 0deefbc62d0..f10c5d41e9b 100755 --- a/lucene/src/test/org/apache/lucene/search/TestSloppyPhraseQuery.java +++ b/lucene/src/test/org/apache/lucene/search/TestSloppyPhraseQuery.java @@ -121,7 +121,7 @@ public class TestSloppyPhraseQuery extends LuceneTestCase { IndexReader reader = writer.getReader(); - IndexSearcher searcher = new IndexSearcher(reader); + IndexSearcher searcher = newSearcher(reader); TopDocs td = searcher.search(query,null,10); //System.out.println("slop: "+slop+" query: "+query+" doc: "+doc+" Expecting number of hits: "+expectedNumResults+" maxScore="+td.getMaxScore()); assertEquals("slop: "+slop+" query: "+query+" doc: "+doc+" Wrong number of hits", expectedNumResults, td.totalHits); diff --git a/lucene/src/test/org/apache/lucene/search/TestSort.java b/lucene/src/test/org/apache/lucene/search/TestSort.java index a4b22d654e0..39d2f170b63 100644 --- a/lucene/src/test/org/apache/lucene/search/TestSort.java +++ b/lucene/src/test/org/apache/lucene/search/TestSort.java @@ -34,6 +34,7 @@ import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.index.CorruptIndexException; import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexReader.AtomicReaderContext; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.MultiReader; @@ -120,7 +121,7 @@ public class TestSort extends LuceneTestCase implements Serializable { throws IOException { Directory indexStore = newDirectory(); dirs.add(indexStore); - RandomIndexWriter writer = new RandomIndexWriter(random, indexStore); + RandomIndexWriter writer = new RandomIndexWriter(random, indexStore, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).setMergePolicy(newInOrderLogMergePolicy())); for (int i=0; i spans = result.getPositions(); assertTrue("spans is null and it shouldn't be", spans != null); int size = getDocIdSetSize(docIdSet); @@ -60,6 +65,7 @@ public class TestSpanQueryFilter extends LuceneTestCase { //There should be two positions in each assertTrue("info.getPositions() Size: " + info.getPositions().size() + " is not: " + 2, info.getPositions().size() == 2); } + reader.close(); dir.close(); } diff --git a/lucene/src/test/org/apache/lucene/search/TestSubScorerFreqs.java b/lucene/src/test/org/apache/lucene/search/TestSubScorerFreqs.java index 4c2b05cb8fa..985cb8010e7 100644 --- a/lucene/src/test/org/apache/lucene/search/TestSubScorerFreqs.java +++ b/lucene/src/test/org/apache/lucene/search/TestSubScorerFreqs.java @@ -17,19 +17,20 @@ package org.apache.lucene.search; * limitations under the License. */ +import java.io.*; +import java.util.*; + +import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.document.*; import org.apache.lucene.index.*; -import org.apache.lucene.util.*; +import org.apache.lucene.index.IndexReader.AtomicReaderContext; import org.apache.lucene.search.BooleanClause.Occur; import org.apache.lucene.search.Scorer.ScorerVisitor; import org.apache.lucene.store.*; - -import java.util.*; -import java.io.*; - -import org.junit.Test; +import org.apache.lucene.util.*; import org.junit.AfterClass; import org.junit.BeforeClass; +import org.junit.Test; public class TestSubScorerFreqs extends LuceneTestCase { @@ -40,7 +41,7 @@ public class TestSubScorerFreqs extends LuceneTestCase { public static void makeIndex() throws Exception { dir = new RAMDirectory(); RandomIndexWriter w = new RandomIndexWriter( - random, dir); + random, dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).setMergePolicy(newInOrderLogMergePolicy())); // make sure we have more than one segment occationally for (int i = 0; i < 31 * RANDOM_MULTIPLIER; i++) { Document doc = new Document(); @@ -53,7 +54,7 @@ public class TestSubScorerFreqs extends LuceneTestCase { w.addDocument(doc); } - s = new IndexSearcher(w.getReader()); + s = newSearcher(w.getReader()); w.close(); } @@ -126,10 +127,10 @@ public class TestSubScorerFreqs extends LuceneTestCase { } @Override - public void setNextReader(IndexReader reader, int docBase) + public void setNextReader(AtomicReaderContext context) throws IOException { - this.docBase = docBase; - other.setNextReader(reader, docBase); + docBase = context.docBase; + other.setNextReader(context); } @Override diff --git a/lucene/src/test/org/apache/lucene/search/TestTermRangeFilter.java b/lucene/src/test/org/apache/lucene/search/TestTermRangeFilter.java index 1bcd546fd49..03b2b06f7a5 100644 --- a/lucene/src/test/org/apache/lucene/search/TestTermRangeFilter.java +++ b/lucene/src/test/org/apache/lucene/search/TestTermRangeFilter.java @@ -44,7 +44,7 @@ public class TestTermRangeFilter extends BaseTestRangeFilter { public void testRangeFilterId() throws IOException { IndexReader reader = signedIndexReader; - IndexSearcher search = new IndexSearcher(reader); + IndexSearcher search = newSearcher(reader); int medId = ((maxId - minId) / 2); @@ -141,13 +141,14 @@ public class TestTermRangeFilter extends BaseTestRangeFilter { numDocs).scoreDocs; assertEquals("med,med,T,T", 1, result.length); + search.close(); } @Test public void testRangeFilterIdCollating() throws IOException { IndexReader reader = signedIndexReader; - IndexSearcher search = new IndexSearcher(reader); + IndexSearcher search = newSearcher(reader); Collator c = Collator.getInstance(Locale.ENGLISH); @@ -243,13 +244,15 @@ public class TestTermRangeFilter extends BaseTestRangeFilter { numHits = search.search(q, new TermRangeFilter("id", medIP, medIP, T, T, c), 1000).totalHits; assertEquals("med,med,T,T", 1, numHits); + + search.close(); } @Test public void testRangeFilterRand() throws IOException { IndexReader reader = signedIndexReader; - IndexSearcher search = new IndexSearcher(reader); + IndexSearcher search = newSearcher(reader); String minRP = pad(signedIndexDir.minR); String maxRP = pad(signedIndexDir.maxR); @@ -320,6 +323,7 @@ public class TestTermRangeFilter extends BaseTestRangeFilter { numDocs).scoreDocs; assertEquals("max,nul,T,T", 1, result.length); + search.close(); } @Test @@ -327,7 +331,7 @@ public class TestTermRangeFilter extends BaseTestRangeFilter { // using the unsigned index because collation seems to ignore hyphens IndexReader reader = unsignedIndexReader; - IndexSearcher search = new IndexSearcher(reader); + IndexSearcher search = newSearcher(reader); Collator c = Collator.getInstance(Locale.ENGLISH); @@ -398,6 +402,8 @@ public class TestTermRangeFilter extends BaseTestRangeFilter { numHits = search.search(q, new TermRangeFilter("rand", maxRP, null, T, F, c), 1000).totalHits; assertEquals("max,nul,T,T", 1, numHits); + + search.close(); } @Test @@ -417,7 +423,7 @@ public class TestTermRangeFilter extends BaseTestRangeFilter { IndexReader reader = writer.getReader(); writer.close(); - IndexSearcher search = new IndexSearcher(reader); + IndexSearcher search = newSearcher(reader); Query q = new TermQuery(new Term("body", "body")); // Neither Java 1.4.2 nor 1.5.0 has Farsi Locale collation available in @@ -461,7 +467,7 @@ public class TestTermRangeFilter extends BaseTestRangeFilter { IndexReader reader = writer.getReader(); writer.close(); - IndexSearcher search = new IndexSearcher(reader); + IndexSearcher search = newSearcher(reader); Query q = new TermQuery(new Term("body", "body")); Collator collator = Collator.getInstance(new Locale("da", "dk")); diff --git a/lucene/src/test/org/apache/lucene/search/TestTermScorer.java b/lucene/src/test/org/apache/lucene/search/TestTermScorer.java index 53900f67b84..a2d3a5d8c0a 100644 --- a/lucene/src/test/org/apache/lucene/search/TestTermScorer.java +++ b/lucene/src/test/org/apache/lucene/search/TestTermScorer.java @@ -21,14 +21,17 @@ import java.io.IOException; import java.util.ArrayList; import java.util.List; -import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; +import org.apache.lucene.index.IndexReader.AtomicReaderContext; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.RandomIndexWriter; -import org.apache.lucene.index.Term; import org.apache.lucene.index.SlowMultiReaderWrapper; +import org.apache.lucene.index.Term; +import org.apache.lucene.search.Weight.ScorerContext; import org.apache.lucene.store.Directory; +import org.apache.lucene.util.LuceneTestCase; public class TestTermScorer extends LuceneTestCase { protected Directory directory; @@ -44,7 +47,7 @@ public class TestTermScorer extends LuceneTestCase { super.setUp(); directory = newDirectory(); - RandomIndexWriter writer = new RandomIndexWriter(random, directory); + RandomIndexWriter writer = new RandomIndexWriter(random, directory, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).setMergePolicy(newInOrderLogMergePolicy())); for (int i = 0; i < values.length; i++) { Document doc = new Document(); doc @@ -54,7 +57,7 @@ public class TestTermScorer extends LuceneTestCase { } indexReader = new SlowMultiReaderWrapper(writer.getReader()); writer.close(); - indexSearcher = new IndexSearcher(indexReader); + indexSearcher = newSearcher(indexReader); } @Override @@ -70,8 +73,8 @@ public class TestTermScorer extends LuceneTestCase { TermQuery termQuery = new TermQuery(allTerm); Weight weight = termQuery.weight(indexSearcher); - - Scorer ts = weight.scorer(indexSearcher.getIndexReader(), true, true); + assertTrue(indexSearcher.getTopReaderContext().isAtomic); + Scorer ts = weight.scorer((AtomicReaderContext)indexSearcher.getTopReaderContext(), ScorerContext.def().scoreDocsInOrder(true).topScorer(true)); // we have 2 documents with the term all in them, one document for all the // other values final List docs = new ArrayList(); @@ -97,8 +100,8 @@ public class TestTermScorer extends LuceneTestCase { } @Override - public void setNextReader(IndexReader reader, int docBase) { - base = docBase; + public void setNextReader(AtomicReaderContext context) { + base = context.docBase; } @Override @@ -131,8 +134,8 @@ public class TestTermScorer extends LuceneTestCase { TermQuery termQuery = new TermQuery(allTerm); Weight weight = termQuery.weight(indexSearcher); - - Scorer ts = weight.scorer(indexSearcher.getIndexReader(), true, true); + assertTrue(indexSearcher.getTopReaderContext().isAtomic); + Scorer ts = weight.scorer((AtomicReaderContext) indexSearcher.getTopReaderContext(), ScorerContext.def().scoreDocsInOrder(true).topScorer(true)); assertTrue("next did not return a doc", ts.nextDoc() != DocIdSetIterator.NO_MORE_DOCS); assertTrue("score is not correct", ts.score() == 1.6931472f); @@ -149,8 +152,9 @@ public class TestTermScorer extends LuceneTestCase { TermQuery termQuery = new TermQuery(allTerm); Weight weight = termQuery.weight(indexSearcher); - - Scorer ts = weight.scorer(indexSearcher.getIndexReader(), true, true); + assertTrue(indexSearcher.getTopReaderContext().isAtomic); + + Scorer ts = weight.scorer((AtomicReaderContext) indexSearcher.getTopReaderContext(), ScorerContext.def().scoreDocsInOrder(true).topScorer(true)); assertTrue("Didn't skip", ts.advance(3) != DocIdSetIterator.NO_MORE_DOCS); // The next doc should be doc 5 assertTrue("doc should be number 5", ts.docID() == 5); diff --git a/lucene/src/test/org/apache/lucene/search/TestTermVectors.java b/lucene/src/test/org/apache/lucene/search/TestTermVectors.java index 0c1cf1f783a..2b4032d6c5e 100644 --- a/lucene/src/test/org/apache/lucene/search/TestTermVectors.java +++ b/lucene/src/test/org/apache/lucene/search/TestTermVectors.java @@ -42,7 +42,7 @@ public class TestTermVectors extends LuceneTestCase { public void setUp() throws Exception { super.setUp(); directory = newDirectory(); - RandomIndexWriter writer = new RandomIndexWriter(random, directory, new MockAnalyzer(MockTokenizer.SIMPLE, true)); + RandomIndexWriter writer = new RandomIndexWriter(random, directory, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(MockTokenizer.SIMPLE, true)).setMergePolicy(newInOrderLogMergePolicy())); //writer.setUseCompoundFile(true); //writer.infoStream = System.out; for (int i = 0; i < 1000; i++) { @@ -71,7 +71,7 @@ public class TestTermVectors extends LuceneTestCase { } reader = writer.getReader(); writer.close(); - searcher = new IndexSearcher(reader); + searcher = newSearcher(reader); } @Override @@ -239,14 +239,14 @@ public class TestTermVectors extends LuceneTestCase { RandomIndexWriter writer = new RandomIndexWriter(random, dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(MockTokenizer.SIMPLE, true)) - .setOpenMode(OpenMode.CREATE)); + .setOpenMode(OpenMode.CREATE).setMergePolicy(newInOrderLogMergePolicy())); writer.addDocument(testDoc1); writer.addDocument(testDoc2); writer.addDocument(testDoc3); writer.addDocument(testDoc4); IndexReader reader = writer.getReader(); writer.close(); - IndexSearcher knownSearcher = new IndexSearcher(reader); + IndexSearcher knownSearcher = newSearcher(reader); FieldsEnum fields = MultiFields.getFields(knownSearcher.reader).iterator(); DocsEnum docs = null; @@ -378,7 +378,7 @@ public class TestTermVectors extends LuceneTestCase { } IndexReader reader = writer.getReader(); writer.close(); - searcher = new IndexSearcher(reader); + searcher = newSearcher(reader); Query query = new TermQuery(new Term("field", "hundred")); ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs; @@ -414,7 +414,7 @@ public class TestTermVectors extends LuceneTestCase { IndexReader reader = writer.getReader(); writer.close(); - searcher = new IndexSearcher(reader); + searcher = newSearcher(reader); Query query = new TermQuery(new Term("field", "one")); ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs; diff --git a/lucene/src/test/org/apache/lucene/search/TestThreadSafe.java b/lucene/src/test/org/apache/lucene/search/TestThreadSafe.java index cb9f13f10a1..5a57d54b64b 100755 --- a/lucene/src/test/org/apache/lucene/search/TestThreadSafe.java +++ b/lucene/src/test/org/apache/lucene/search/TestThreadSafe.java @@ -27,6 +27,7 @@ import org.apache.lucene.document.*; import java.util.Random; import java.util.List; +import java.util.concurrent.atomic.AtomicBoolean; import java.io.IOException; public class TestThreadSafe extends LuceneTestCase { @@ -34,16 +35,16 @@ public class TestThreadSafe extends LuceneTestCase { IndexReader ir1; - String failure=null; - - class Thr extends Thread { final int iter; final Random rand; + final AtomicBoolean failed; + // pass in random in case we want to make things reproducable - public Thr(int iter, Random rand) { + public Thr(int iter, Random rand, AtomicBoolean failed) { this.iter = iter; this.rand = rand; + this.failed = failed; } @Override @@ -61,8 +62,8 @@ public class TestThreadSafe extends LuceneTestCase { } } catch (Throwable th) { - failure=th.toString(); - fail(failure); + failed.set(true); + throw new RuntimeException(th); } } @@ -124,16 +125,15 @@ public class TestThreadSafe extends LuceneTestCase { void doTest(int iter, int nThreads) throws Exception { Thr[] tarr = new Thr[nThreads]; + AtomicBoolean failed = new AtomicBoolean(); for (int i=0; i doSearch(int numResults) throws IOException { Query q = new MatchAllDocsQuery(); - IndexSearcher searcher = new IndexSearcher(reader); + IndexSearcher searcher = newSearcher(reader); TopDocsCollector tdc = new MyTopsDocCollector(numResults); searcher.search(q, tdc); searcher.close(); diff --git a/lucene/src/test/org/apache/lucene/search/TestTopScoreDocCollector.java b/lucene/src/test/org/apache/lucene/search/TestTopScoreDocCollector.java index 036a211dccd..102dce579e3 100644 --- a/lucene/src/test/org/apache/lucene/search/TestTopScoreDocCollector.java +++ b/lucene/src/test/org/apache/lucene/search/TestTopScoreDocCollector.java @@ -47,7 +47,7 @@ public class TestTopScoreDocCollector extends LuceneTestCase { // the clause instead of BQ. bq.setMinimumNumberShouldMatch(1); IndexReader reader = writer.getReader(); - IndexSearcher searcher = new IndexSearcher(reader); + IndexSearcher searcher = newSearcher(reader); for (int i = 0; i < inOrder.length; i++) { TopDocsCollector tdc = TopScoreDocCollector.create(3, inOrder[i]); assertEquals("org.apache.lucene.search.TopScoreDocCollector$" + actualTSDCClass[i], tdc.getClass().getName()); diff --git a/lucene/src/test/org/apache/lucene/search/TestWildcard.java b/lucene/src/test/org/apache/lucene/search/TestWildcard.java index 56e2825e684..22f4cc52cd2 100644 --- a/lucene/src/test/org/apache/lucene/search/TestWildcard.java +++ b/lucene/src/test/org/apache/lucene/search/TestWildcard.java @@ -298,7 +298,9 @@ public class TestWildcard // prepare the index Directory dir = newDirectory(); - RandomIndexWriter iw = new RandomIndexWriter(random, dir); + RandomIndexWriter iw = new RandomIndexWriter(random, dir, + newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()) + .setMergePolicy(newInOrderLogMergePolicy())); for (int i = 0; i < docs.length; i++) { Document doc = new Document(); doc.add(newField(field,docs[i],Store.NO,Index.ANALYZED)); diff --git a/lucene/src/test/org/apache/lucene/search/TestWildcardRandom.java b/lucene/src/test/org/apache/lucene/search/TestWildcardRandom.java index bb07c16549d..cd685ddaa7c 100644 --- a/lucene/src/test/org/apache/lucene/search/TestWildcardRandom.java +++ b/lucene/src/test/org/apache/lucene/search/TestWildcardRandom.java @@ -61,7 +61,7 @@ public class TestWildcardRandom extends LuceneTestCase { } reader = writer.getReader(); - searcher = new IndexSearcher(reader); + searcher = newSearcher(reader); writer.close(); } diff --git a/lucene/src/test/org/apache/lucene/search/cache/TestEntryCreators.java b/lucene/src/test/org/apache/lucene/search/cache/TestEntryCreators.java index 7556065071b..fad6f63c3f2 100644 --- a/lucene/src/test/org/apache/lucene/search/cache/TestEntryCreators.java +++ b/lucene/src/test/org/apache/lucene/search/cache/TestEntryCreators.java @@ -22,12 +22,13 @@ import java.lang.reflect.Method; import java.util.HashSet; import java.util.Set; +import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.RandomIndexWriter; -import org.apache.lucene.search.FieldCache; import org.apache.lucene.search.FieldCache.*; +import org.apache.lucene.search.FieldCache; import org.apache.lucene.store.Directory; import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.OpenBitSet; @@ -53,6 +54,7 @@ public class TestEntryCreators extends LuceneTestCase { this.parser = parser; values = new Number[NUM_DOCS]; } + @Override public String toString() { return field; @@ -65,7 +67,7 @@ public class TestEntryCreators extends LuceneTestCase { public void setUp() throws Exception { super.setUp(); directory = newDirectory(); - RandomIndexWriter writer= new RandomIndexWriter(random, directory); + RandomIndexWriter writer= new RandomIndexWriter(random, directory, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).setMergePolicy(newInOrderLogMergePolicy())); typeTests = new NumberTypeTester[] { new NumberTypeTester( "theRandomByte", "getBytes", ByteValuesCreator.class, ByteParser.class ), diff --git a/lucene/src/test/org/apache/lucene/search/function/FunctionTestSetup.java b/lucene/src/test/org/apache/lucene/search/function/FunctionTestSetup.java index 4baf41de416..15f3d046767 100755 --- a/lucene/src/test/org/apache/lucene/search/function/FunctionTestSetup.java +++ b/lucene/src/test/org/apache/lucene/search/function/FunctionTestSetup.java @@ -99,7 +99,7 @@ public class FunctionTestSetup extends LuceneTestCase { // prepare a small index with just a few documents. dir = newDirectory(); anlzr = new MockAnalyzer(); - IndexWriterConfig iwc = newIndexWriterConfig( TEST_VERSION_CURRENT, anlzr); + IndexWriterConfig iwc = newIndexWriterConfig( TEST_VERSION_CURRENT, anlzr).setMergePolicy(newInOrderLogMergePolicy()); if (doMultiSegment) { iwc.setMaxBufferedDocs(_TestUtil.nextInt(random, 2, 7)); } diff --git a/lucene/src/test/org/apache/lucene/search/function/JustCompileSearchSpans.java b/lucene/src/test/org/apache/lucene/search/function/JustCompileSearchSpans.java index a85f040352c..96f5032c629 100644 --- a/lucene/src/test/org/apache/lucene/search/function/JustCompileSearchSpans.java +++ b/lucene/src/test/org/apache/lucene/search/function/JustCompileSearchSpans.java @@ -18,6 +18,7 @@ package org.apache.lucene.search.function; */ import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexReader.AtomicReaderContext; import org.apache.lucene.search.FieldCache; import java.io.IOException; @@ -82,7 +83,7 @@ final class JustCompileSearchFunction { } @Override - public DocValues getValues(IndexReader reader) throws IOException { + public DocValues getValues(AtomicReaderContext context) throws IOException { throw new UnsupportedOperationException(UNSUPPORTED_MSG); } diff --git a/lucene/src/test/org/apache/lucene/search/function/TestFieldScoreQuery.java b/lucene/src/test/org/apache/lucene/search/function/TestFieldScoreQuery.java index 8ff2af949d9..a90be6ec36c 100755 --- a/lucene/src/test/org/apache/lucene/search/function/TestFieldScoreQuery.java +++ b/lucene/src/test/org/apache/lucene/search/function/TestFieldScoreQuery.java @@ -19,12 +19,13 @@ package org.apache.lucene.search.function; import java.util.HashMap; -import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexReader.AtomicReaderContext; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.QueryUtils; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TopDocs; +import org.apache.lucene.util.ReaderUtil; import org.junit.Test; /** @@ -185,12 +186,12 @@ public class TestFieldScoreQuery extends FunctionTestSetup { FieldScoreQuery q = new FieldScoreQuery(field,tp); ScoreDoc[] h = s.search(q, null, 1000).scoreDocs; assertEquals("All docs should be matched!",N_DOCS,h.length); - IndexReader[] readers = s.getIndexReader().getSequentialSubReaders(); - for (int j = 0; j < readers.length; j++) { - IndexReader reader = readers[j]; + AtomicReaderContext[] leaves = ReaderUtil.leaves(s.getTopReaderContext()); + for (int j = 0; j < leaves.length; j++) { + AtomicReaderContext leaf = leaves[j]; try { if (i == 0) { - innerArray[j] = q.valSrc.getValues(reader).getInnerArray(); + innerArray[j] = q.valSrc.getValues(leaf).getInnerArray(); log(i + ". compare: " + innerArray[j].getClass() + " to " + expectedArrayTypes.get(tp).getClass()); assertEquals( @@ -198,9 +199,9 @@ public class TestFieldScoreQuery extends FunctionTestSetup { innerArray[j].getClass(), expectedArrayTypes.get(tp).getClass()); } else { log(i + ". compare: " + innerArray[j] + " to " - + q.valSrc.getValues(reader).getInnerArray()); + + q.valSrc.getValues(leaf).getInnerArray()); assertSame("field values should be cached and reused!", innerArray[j], - q.valSrc.getValues(reader).getInnerArray()); + q.valSrc.getValues(leaf).getInnerArray()); } } catch (UnsupportedOperationException e) { if (!warned) { @@ -217,15 +218,15 @@ public class TestFieldScoreQuery extends FunctionTestSetup { FieldScoreQuery q = new FieldScoreQuery(field,tp); ScoreDoc[] h = s.search(q, null, 1000).scoreDocs; assertEquals("All docs should be matched!",N_DOCS,h.length); - IndexReader[] readers = s.getIndexReader().getSequentialSubReaders(); - for (int j = 0; j < readers.length; j++) { - IndexReader reader = readers[j]; + AtomicReaderContext[] leaves = ReaderUtil.leaves(s.getTopReaderContext()); + for (int j = 0; j < leaves.length; j++) { + AtomicReaderContext leaf = leaves[j]; try { log("compare: " + innerArray + " to " - + q.valSrc.getValues(reader).getInnerArray()); + + q.valSrc.getValues(leaf).getInnerArray()); assertNotSame( "cached field values should not be reused if reader as changed!", - innerArray, q.valSrc.getValues(reader).getInnerArray()); + innerArray, q.valSrc.getValues(leaf).getInnerArray()); } catch (UnsupportedOperationException e) { if (!warned) { System.err.println("WARNING: " + testName() diff --git a/lucene/src/test/org/apache/lucene/search/function/TestOrdValues.java b/lucene/src/test/org/apache/lucene/search/function/TestOrdValues.java index 8fb7eda1484..07b07f1c317 100644 --- a/lucene/src/test/org/apache/lucene/search/function/TestOrdValues.java +++ b/lucene/src/test/org/apache/lucene/search/function/TestOrdValues.java @@ -18,8 +18,9 @@ package org.apache.lucene.search.function; */ import org.apache.lucene.index.CorruptIndexException; -import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexReader.AtomicReaderContext; import org.apache.lucene.search.*; +import org.apache.lucene.util.ReaderUtil; import org.junit.Test; /** @@ -168,14 +169,14 @@ public class TestOrdValues extends FunctionTestSetup { ScoreDoc[] h = s.search(q, null, 1000).scoreDocs; try { assertEquals("All docs should be matched!", N_DOCS, h.length); - IndexReader[] readers = s.getIndexReader().getSequentialSubReaders(); + AtomicReaderContext[] leaves = ReaderUtil.leaves(s.getTopReaderContext()); - for (IndexReader reader : readers) { + for (AtomicReaderContext leaf : leaves) { if (i == 0) { - innerArray = q.valSrc.getValues(reader).getInnerArray(); + innerArray = q.valSrc.getValues(leaf).getInnerArray(); } else { - log(i + ". compare: " + innerArray + " to " + q.valSrc.getValues(reader).getInnerArray()); - assertSame("field values should be cached and reused!", innerArray, q.valSrc.getValues(reader).getInnerArray()); + log(i + ". compare: " + innerArray + " to " + q.valSrc.getValues(leaf).getInnerArray()); + assertSame("field values should be cached and reused!", innerArray, q.valSrc.getValues(leaf).getInnerArray()); } } } catch (UnsupportedOperationException e) { @@ -201,15 +202,15 @@ public class TestOrdValues extends FunctionTestSetup { q = new ValueSourceQuery(vs); h = s.search(q, null, 1000).scoreDocs; assertEquals("All docs should be matched!", N_DOCS, h.length); - IndexReader[] readers = s.getIndexReader().getSequentialSubReaders(); + AtomicReaderContext[] leaves = ReaderUtil.leaves(s.getTopReaderContext()); - for (IndexReader reader : readers) { + for (AtomicReaderContext leaf : leaves) { try { log("compare (should differ): " + innerArray + " to " - + q.valSrc.getValues(reader).getInnerArray()); + + q.valSrc.getValues(leaf).getInnerArray()); assertNotSame( "different values should be loaded for a different field!", - innerArray, q.valSrc.getValues(reader).getInnerArray()); + innerArray, q.valSrc.getValues(leaf).getInnerArray()); } catch (UnsupportedOperationException e) { if (!warned) { System.err.println("WARNING: " + testName() @@ -229,15 +230,15 @@ public class TestOrdValues extends FunctionTestSetup { q = new ValueSourceQuery(vs); h = s.search(q, null, 1000).scoreDocs; assertEquals("All docs should be matched!", N_DOCS, h.length); - readers = s.getIndexReader().getSequentialSubReaders(); + leaves = ReaderUtil.leaves(s.getTopReaderContext()); - for (IndexReader reader : readers) { + for (AtomicReaderContext leaf : leaves) { try { log("compare (should differ): " + innerArray + " to " - + q.valSrc.getValues(reader).getInnerArray()); + + q.valSrc.getValues(leaf).getInnerArray()); assertNotSame( "cached field values should not be reused if reader as changed!", - innerArray, q.valSrc.getValues(reader).getInnerArray()); + innerArray, q.valSrc.getValues(leaf).getInnerArray()); } catch (UnsupportedOperationException e) { if (!warned) { System.err.println("WARNING: " + testName() @@ -252,5 +253,14 @@ public class TestOrdValues extends FunctionTestSetup { private String testName() { return getClass().getName() + "." + getName(); } + + // LUCENE-1250 + public void testEqualsNull() throws Exception { + OrdFieldSource ofs = new OrdFieldSource("f"); + assertFalse(ofs.equals(null)); + + ReverseOrdFieldSource rofs = new ReverseOrdFieldSource("f"); + assertFalse(rofs.equals(null)); + } } diff --git a/lucene/src/test/org/apache/lucene/search/function/TestValueSource.java b/lucene/src/test/org/apache/lucene/search/function/TestValueSource.java index 38b6e9cd91f..1cb688df2b9 100644 --- a/lucene/src/test/org/apache/lucene/search/function/TestValueSource.java +++ b/lucene/src/test/org/apache/lucene/search/function/TestValueSource.java @@ -22,6 +22,7 @@ import org.apache.lucene.store.*; import org.apache.lucene.search.*; import org.apache.lucene.analysis.*; import org.apache.lucene.index.*; +import org.apache.lucene.index.IndexReader.AtomicReaderContext; import org.apache.lucene.document.*; public class TestValueSource extends LuceneTestCase { @@ -39,17 +40,23 @@ public class TestValueSource extends LuceneTestCase { w.commit(); } - IndexReader r = IndexReader.open(w); + IndexReader r = IndexReader.open(w, true); w.close(); assertTrue(r.getSequentialSubReaders().length > 1); ValueSource s1 = new IntFieldSource("field"); - DocValues v1 = s1.getValues(r); - DocValues v2 = new MultiValueSource(s1).getValues(r); - + AtomicReaderContext[] leaves = ReaderUtil.leaves(r.getTopReaderContext()); + DocValues v1 = null; + DocValues v2 = new MultiValueSource(s1).getValues(r.getTopReaderContext()); + int leafOrd = -1; for(int i=0;i + * NOTE: This should be used for testing purposes only + * @lucene.internal + */ +public class MultiSpansWrapper extends Spans { // can't be package private due to payloads + + private SpanQuery query; + private AtomicReaderContext[] leaves; + private int leafOrd = 0; + private Spans current; + + private MultiSpansWrapper(AtomicReaderContext[] leaves, SpanQuery query) { + this.query = query; + this.leaves = leaves; + + } + + public static Spans wrap(ReaderContext topLevelReaderContext, SpanQuery query) throws IOException { + AtomicReaderContext[] leaves = ReaderUtil.leaves(topLevelReaderContext); + if(leaves.length == 1) { + return query.getSpans(leaves[0]); + } + return new MultiSpansWrapper(leaves, query); + } + + @Override + public boolean next() throws IOException { + if (leafOrd >= leaves.length) { + return false; + } + if (current == null) { + current = query.getSpans(leaves[leafOrd]); + } + while(true) { + if (current.next()) { + return true; + } + if (++leafOrd < leaves.length) { + current = query.getSpans(leaves[leafOrd]); + } else { + current = null; + break; + } + } + return false; + } + + @Override + public boolean skipTo(int target) throws IOException { + if (leafOrd >= leaves.length) { + return false; + } + + int subIndex = ReaderUtil.subIndex(target, leaves); + assert subIndex >= leafOrd; + if (subIndex != leafOrd) { + current = query.getSpans(leaves[subIndex]); + leafOrd = subIndex; + } else if (current == null) { + current = query.getSpans(leaves[leafOrd]); + } + while (true) { + if (current.skipTo(target - leaves[leafOrd].docBase)) { + return true; + } + if (++leafOrd < leaves.length) { + current = query.getSpans(leaves[leafOrd]); + } else { + current = null; + break; + } + } + + return false; + } + + @Override + public int doc() { + if (current == null) { + return DocsEnum.NO_MORE_DOCS; + } + return current.doc() + leaves[leafOrd].docBase; + } + + @Override + public int start() { + if (current == null) { + return DocsEnum.NO_MORE_DOCS; + } + return current.start(); + } + + @Override + public int end() { + if (current == null) { + return DocsEnum.NO_MORE_DOCS; + } + return current.end(); + } + + @Override + public Collection getPayload() throws IOException { + if (current == null) { + return Collections.emptyList(); + } + return current.getPayload(); + } + + @Override + public boolean isPayloadAvailable() { + if (current == null) { + return false; + } + return current.isPayloadAvailable(); + } + +} diff --git a/lucene/src/test/org/apache/lucene/search/spans/TestBasics.java b/lucene/src/test/org/apache/lucene/search/spans/TestBasics.java index 40f792fb26a..b04f96ae333 100644 --- a/lucene/src/test/org/apache/lucene/search/spans/TestBasics.java +++ b/lucene/src/test/org/apache/lucene/search/spans/TestBasics.java @@ -30,7 +30,6 @@ import org.apache.lucene.document.Field; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Payload; import org.apache.lucene.index.RandomIndexWriter; -import org.apache.lucene.index.SlowMultiReaderWrapper; import org.apache.lucene.index.Term; import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanQuery; @@ -70,7 +69,7 @@ public class TestBasics extends LuceneTestCase { directory = newDirectory(); RandomIndexWriter writer = new RandomIndexWriter(random, directory, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(MockTokenizer.SIMPLE, true, true)) - .setMaxBufferedDocs(_TestUtil.nextInt(random, 50, 1000))); + .setMaxBufferedDocs(_TestUtil.nextInt(random, 50, 1000)).setMergePolicy(newInOrderLogMergePolicy())); //writer.infoStream = System.out; for (int i = 0; i < 2000; i++) { Document doc = new Document(); @@ -78,7 +77,7 @@ public class TestBasics extends LuceneTestCase { writer.addDocument(doc); } reader = writer.getReader(); - searcher = new IndexSearcher(reader); + searcher = newSearcher(reader); writer.close(); } @@ -176,6 +175,27 @@ public class TestBasics extends LuceneTestCase { QueryUtils.check(term2); QueryUtils.checkUnequal(term1,term2); } + + public void testSpanTermQuery() throws Exception { + SpanTermQuery term1 = new SpanTermQuery(new Term("field", "seventy")); + checkHits(term1, new int[] + { 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 170, + 171, 172, 173, 174, 175, 176, 177, 178, 179, 270, 271, 272, 273, 274, + 275, 276, 277, 278, 279, 370, 371, 372, 373, 374, 375, 376, 377, 378, + 379, 470, 471, 472, 473, 474, 475, 476, 477, 478, 479, 570, 571, 572, + 573, 574, 575, 576, 577, 578, 579, 670, 671, 672, 673, 674, 675, 676, + 677, 678, 679, 770, 771, 772, 773, 774, 775, 776, 777, 778, 779, 870, + 871, 872, 873, 874, 875, 876, 877, 878, 879, 970, 971, 972, 973, 974, + 975, 976, 977, 978, 979, 1070, 1071, 1072, 1073, 1074, 1075, 1076, + 1077, 1078, 1079, 1170, 1270, 1370, 1470, 1570, 1670, 1770, 1870, 1970, + 1171, 1172, 1173, 1174, 1175, 1176, 1177, 1178, 1179, 1271, 1272, 1273, + 1274, 1275, 1276, 1277, 1278, 1279, 1371, 1372, 1373, 1374, 1375, 1376, + 1377, 1378, 1379, 1471, 1472, 1473, 1474, 1475, 1476, 1477, 1478, 1479, + 1571, 1572, 1573, 1574, 1575, 1576, 1577, 1578, 1579, 1671, 1672, 1673, + 1674, 1675, 1676, 1677, 1678, 1679, 1771, 1772, 1773, 1774, 1775, 1776, + 1777, 1778, 1779, 1871, 1872, 1873, 1874, 1875, 1876, 1877, 1878, 1879, + 1971, 1972, 1973, 1974, 1975, 1976, 1977, 1978, 1979 }); + } @Test public void testSpanNearUnordered() throws Exception { @@ -522,8 +542,8 @@ public class TestBasics extends LuceneTestCase { public void testSpansSkipTo() throws Exception { SpanTermQuery t1 = new SpanTermQuery(new Term("field", "seventy")); SpanTermQuery t2 = new SpanTermQuery(new Term("field", "seventy")); - Spans s1 = t1.getSpans(new SlowMultiReaderWrapper(searcher.getIndexReader())); - Spans s2 = t2.getSpans(new SlowMultiReaderWrapper(searcher.getIndexReader())); + Spans s1 = MultiSpansWrapper.wrap(searcher.getTopReaderContext(), t1); + Spans s2 = MultiSpansWrapper.wrap(searcher.getTopReaderContext(), t2); assertTrue(s1.next()); assertTrue(s2.next()); diff --git a/lucene/src/test/org/apache/lucene/search/spans/TestFieldMaskingSpanQuery.java b/lucene/src/test/org/apache/lucene/search/spans/TestFieldMaskingSpanQuery.java index a9fe8dd65f9..f86aea3b4b2 100644 --- a/lucene/src/test/org/apache/lucene/search/spans/TestFieldMaskingSpanQuery.java +++ b/lucene/src/test/org/apache/lucene/search/spans/TestFieldMaskingSpanQuery.java @@ -20,11 +20,11 @@ package org.apache.lucene.search.spans; import java.util.HashSet; import java.util.Set; +import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.RandomIndexWriter; -import org.apache.lucene.index.SlowMultiReaderWrapper; import org.apache.lucene.index.Term; import org.apache.lucene.search.CheckHits; import org.apache.lucene.search.IndexSearcher; @@ -55,7 +55,7 @@ public class TestFieldMaskingSpanQuery extends LuceneTestCase { public void setUp() throws Exception { super.setUp(); directory = newDirectory(); - RandomIndexWriter writer= new RandomIndexWriter(random, directory); + RandomIndexWriter writer= new RandomIndexWriter(random, directory, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).setMergePolicy(newInOrderLogMergePolicy())); writer.addDocument(doc(new Field[] { field("id", "0") , @@ -112,7 +112,7 @@ public class TestFieldMaskingSpanQuery extends LuceneTestCase { field("last", "jones") })); reader = writer.getReader(); writer.close(); - searcher = new IndexSearcher(reader); + searcher = newSearcher(reader); } @Override @@ -254,7 +254,7 @@ public class TestFieldMaskingSpanQuery extends LuceneTestCase { SpanQuery q = new SpanOrQuery(q1, new FieldMaskingSpanQuery(q2, "gender")); check(q, new int[] { 0, 1, 2, 3, 4 }); - Spans span = q.getSpans(new SlowMultiReaderWrapper(searcher.getIndexReader())); + Spans span = MultiSpansWrapper.wrap(searcher.getTopReaderContext(), q); assertEquals(true, span.next()); assertEquals(s(0,0,1), s(span)); @@ -295,8 +295,8 @@ public class TestFieldMaskingSpanQuery extends LuceneTestCase { check(qA, new int[] { 0, 1, 2, 4 }); check(qB, new int[] { 0, 1, 2, 4 }); - Spans spanA = qA.getSpans(new SlowMultiReaderWrapper(searcher.getIndexReader())); - Spans spanB = qB.getSpans(new SlowMultiReaderWrapper(searcher.getIndexReader())); + Spans spanA = MultiSpansWrapper.wrap(searcher.getTopReaderContext(), qA); + Spans spanB = MultiSpansWrapper.wrap(searcher.getTopReaderContext(), qB); while (spanA.next()) { assertTrue("spanB not still going", spanB.next()); @@ -316,7 +316,7 @@ public class TestFieldMaskingSpanQuery extends LuceneTestCase { new FieldMaskingSpanQuery(qB, "id") }, -1, false ); check(q, new int[] { 0, 1, 2, 3 }); - Spans span = q.getSpans(new SlowMultiReaderWrapper(searcher.getIndexReader())); + Spans span = MultiSpansWrapper.wrap(searcher.getTopReaderContext(), q); assertEquals(true, span.next()); assertEquals(s(0,0,1), s(span)); diff --git a/lucene/src/test/org/apache/lucene/search/spans/TestNearSpansOrdered.java b/lucene/src/test/org/apache/lucene/search/spans/TestNearSpansOrdered.java index d5b6b406a05..8316ff8d858 100644 --- a/lucene/src/test/org/apache/lucene/search/spans/TestNearSpansOrdered.java +++ b/lucene/src/test/org/apache/lucene/search/spans/TestNearSpansOrdered.java @@ -21,7 +21,8 @@ import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.index.IndexReader; -import org.apache.lucene.index.SlowMultiReaderWrapper; +import org.apache.lucene.index.IndexReader.AtomicReaderContext; +import org.apache.lucene.index.IndexReader.ReaderContext; import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.index.Term; import org.apache.lucene.queryParser.QueryParser; @@ -30,8 +31,10 @@ import org.apache.lucene.search.Explanation; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Weight; import org.apache.lucene.search.Scorer; +import org.apache.lucene.search.Weight.ScorerContext; import org.apache.lucene.store.Directory; import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.util.ReaderUtil; public class TestNearSpansOrdered extends LuceneTestCase { protected IndexSearcher searcher; @@ -54,15 +57,15 @@ public class TestNearSpansOrdered extends LuceneTestCase { public void setUp() throws Exception { super.setUp(); directory = newDirectory(); - RandomIndexWriter writer= new RandomIndexWriter(random, directory); + RandomIndexWriter writer= new RandomIndexWriter(random, directory, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).setMergePolicy(newInOrderLogMergePolicy())); for (int i = 0; i < docFields.length; i++) { Document doc = new Document(); doc.add(newField(FIELD, docFields[i], Field.Store.NO, Field.Index.ANALYZED)); writer.addDocument(doc); } - reader = new SlowMultiReaderWrapper(writer.getReader()); + reader = writer.getReader(); writer.close(); - searcher = new IndexSearcher(reader); + searcher = newSearcher(reader); } protected String[] docFields = { @@ -100,7 +103,7 @@ public class TestNearSpansOrdered extends LuceneTestCase { public void testNearSpansNext() throws Exception { SpanNearQuery q = makeQuery(); - Spans span = q.getSpans(searcher.getIndexReader()); + Spans span = MultiSpansWrapper.wrap(searcher.getTopReaderContext(), q); assertEquals(true, span.next()); assertEquals(s(0,0,3), s(span)); assertEquals(true, span.next()); @@ -115,7 +118,7 @@ public class TestNearSpansOrdered extends LuceneTestCase { */ public void testNearSpansSkipToLikeNext() throws Exception { SpanNearQuery q = makeQuery(); - Spans span = q.getSpans(searcher.getIndexReader()); + Spans span = MultiSpansWrapper.wrap(searcher.getTopReaderContext(), q); assertEquals(true, span.skipTo(0)); assertEquals(s(0,0,3), s(span)); assertEquals(true, span.skipTo(1)); @@ -125,7 +128,7 @@ public class TestNearSpansOrdered extends LuceneTestCase { public void testNearSpansNextThenSkipTo() throws Exception { SpanNearQuery q = makeQuery(); - Spans span = q.getSpans(searcher.getIndexReader()); + Spans span = MultiSpansWrapper.wrap(searcher.getTopReaderContext(), q); assertEquals(true, span.next()); assertEquals(s(0,0,3), s(span)); assertEquals(true, span.skipTo(1)); @@ -135,7 +138,7 @@ public class TestNearSpansOrdered extends LuceneTestCase { public void testNearSpansNextThenSkipPast() throws Exception { SpanNearQuery q = makeQuery(); - Spans span = q.getSpans(searcher.getIndexReader()); + Spans span = MultiSpansWrapper.wrap(searcher.getTopReaderContext(), q); assertEquals(true, span.next()); assertEquals(s(0,0,3), s(span)); assertEquals(false, span.skipTo(2)); @@ -143,20 +146,20 @@ public class TestNearSpansOrdered extends LuceneTestCase { public void testNearSpansSkipPast() throws Exception { SpanNearQuery q = makeQuery(); - Spans span = q.getSpans(searcher.getIndexReader()); + Spans span = MultiSpansWrapper.wrap(searcher.getTopReaderContext(), q); assertEquals(false, span.skipTo(2)); } public void testNearSpansSkipTo0() throws Exception { SpanNearQuery q = makeQuery(); - Spans span = q.getSpans(searcher.getIndexReader()); + Spans span = MultiSpansWrapper.wrap(searcher.getTopReaderContext(), q); assertEquals(true, span.skipTo(0)); assertEquals(s(0,0,3), s(span)); } public void testNearSpansSkipTo1() throws Exception { SpanNearQuery q = makeQuery(); - Spans span = q.getSpans(searcher.getIndexReader()); + Spans span = MultiSpansWrapper.wrap(searcher.getTopReaderContext(), q); assertEquals(true, span.skipTo(1)); assertEquals(s(1,0,4), s(span)); } @@ -168,7 +171,9 @@ public class TestNearSpansOrdered extends LuceneTestCase { public void testSpanNearScorerSkipTo1() throws Exception { SpanNearQuery q = makeQuery(); Weight w = q.weight(searcher); - Scorer s = w.scorer(searcher.getIndexReader(), true, false); + ReaderContext topReaderContext = searcher.getTopReaderContext(); + AtomicReaderContext[] leaves = ReaderUtil.leaves(topReaderContext); + Scorer s = w.scorer(leaves[0], ScorerContext.def()); assertEquals(1, s.advance(1)); } /** @@ -177,7 +182,10 @@ public class TestNearSpansOrdered extends LuceneTestCase { */ public void testSpanNearScorerExplain() throws Exception { SpanNearQuery q = makeQuery(); - Explanation e = q.weight(searcher).explain(searcher.getIndexReader(), 1); + ReaderContext topReaderContext = searcher.getTopReaderContext(); + AtomicReaderContext[] leaves = ReaderUtil.leaves(topReaderContext); + + Explanation e = q.weight(searcher).explain(leaves[0], 1); assertTrue("Scorer explanation value for doc#1 isn't positive: " + e.toString(), 0.0f < e.getValue()); diff --git a/lucene/src/test/org/apache/lucene/search/spans/TestPayloadSpans.java b/lucene/src/test/org/apache/lucene/search/spans/TestPayloadSpans.java index bfbd224ca0b..2ae7efd63a3 100644 --- a/lucene/src/test/org/apache/lucene/search/spans/TestPayloadSpans.java +++ b/lucene/src/test/org/apache/lucene/search/spans/TestPayloadSpans.java @@ -35,12 +35,11 @@ import org.apache.lucene.document.Field; import org.apache.lucene.index.CorruptIndexException; import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.index.IndexReader; -import org.apache.lucene.index.SlowMultiReaderWrapper; import org.apache.lucene.index.Payload; import org.apache.lucene.index.Term; import org.apache.lucene.search.DefaultSimilarity; import org.apache.lucene.search.IndexSearcher; -import org.apache.lucene.search.Similarity; +import org.apache.lucene.search.SimilarityProvider; import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.TopDocs; import org.apache.lucene.search.payloads.PayloadHelper; @@ -51,7 +50,7 @@ import org.apache.lucene.util.LuceneTestCase; public class TestPayloadSpans extends LuceneTestCase { private IndexSearcher searcher; - private Similarity similarity = new DefaultSimilarity(); + private SimilarityProvider similarity = new DefaultSimilarity(); protected IndexReader indexReader; private IndexReader closeIndexReader; private Directory directory; @@ -68,12 +67,12 @@ public class TestPayloadSpans extends LuceneTestCase { SpanTermQuery stq; Spans spans; stq = new SpanTermQuery(new Term(PayloadHelper.FIELD, "seventy")); - spans = stq.getSpans(indexReader); + spans = MultiSpansWrapper.wrap(indexReader.getTopReaderContext(), stq); assertTrue("spans is null and it shouldn't be", spans != null); checkSpans(spans, 100, 1, 1, 1); stq = new SpanTermQuery(new Term(PayloadHelper.NO_PAYLOAD_FIELD, "seventy")); - spans = stq.getSpans(indexReader); + spans = MultiSpansWrapper.wrap(indexReader.getTopReaderContext(), stq); assertTrue("spans is null and it shouldn't be", spans != null); checkSpans(spans, 100, 0, 0, 0); } @@ -84,7 +83,7 @@ public class TestPayloadSpans extends LuceneTestCase { SpanFirstQuery sfq; match = new SpanTermQuery(new Term(PayloadHelper.FIELD, "one")); sfq = new SpanFirstQuery(match, 2); - Spans spans = sfq.getSpans(indexReader); + Spans spans = MultiSpansWrapper.wrap(indexReader.getTopReaderContext(), sfq); checkSpans(spans, 109, 1, 1, 1); //Test more complicated subclause SpanQuery[] clauses = new SpanQuery[2]; @@ -92,11 +91,11 @@ public class TestPayloadSpans extends LuceneTestCase { clauses[1] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "hundred")); match = new SpanNearQuery(clauses, 0, true); sfq = new SpanFirstQuery(match, 2); - checkSpans(sfq.getSpans(indexReader), 100, 2, 1, 1); + checkSpans(MultiSpansWrapper.wrap(indexReader.getTopReaderContext(), sfq), 100, 2, 1, 1); match = new SpanNearQuery(clauses, 0, false); sfq = new SpanFirstQuery(match, 2); - checkSpans(sfq.getSpans(indexReader), 100, 2, 1, 1); + checkSpans(MultiSpansWrapper.wrap(indexReader.getTopReaderContext(), sfq), 100, 2, 1, 1); } @@ -111,7 +110,7 @@ public class TestPayloadSpans extends LuceneTestCase { Directory directory = newDirectory(); RandomIndexWriter writer = new RandomIndexWriter(random, directory, - newIndexWriterConfig(TEST_VERSION_CURRENT, new PayloadAnalyzer()).setSimilarity(similarity)); + newIndexWriterConfig(TEST_VERSION_CURRENT, new PayloadAnalyzer()).setSimilarityProvider(similarity)); Document doc = new Document(); doc.add(newField(PayloadHelper.FIELD, "one two three one four three", @@ -119,8 +118,9 @@ public class TestPayloadSpans extends LuceneTestCase { writer.addDocument(doc); IndexReader reader = writer.getReader(); writer.close(); + - checkSpans(snq.getSpans(new SlowMultiReaderWrapper(reader)), 1,new int[]{2}); + checkSpans(MultiSpansWrapper.wrap(reader.getTopReaderContext(), snq), 1,new int[]{2}); reader.close(); directory.close(); } @@ -130,7 +130,7 @@ public class TestPayloadSpans extends LuceneTestCase { Spans spans; IndexSearcher searcher = getSearcher(); stq = new SpanTermQuery(new Term(PayloadHelper.FIELD, "mark")); - spans = stq.getSpans(searcher.getIndexReader()); + spans = MultiSpansWrapper.wrap(searcher.getTopReaderContext(), stq); assertTrue("spans is null and it shouldn't be", spans != null); checkSpans(spans, 0, null); @@ -141,7 +141,7 @@ public class TestPayloadSpans extends LuceneTestCase { clauses[2] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "xx")); SpanNearQuery spanNearQuery = new SpanNearQuery(clauses, 12, false); - spans = spanNearQuery.getSpans(searcher.getIndexReader()); + spans = MultiSpansWrapper.wrap(searcher.getTopReaderContext(), spanNearQuery); assertTrue("spans is null and it shouldn't be", spans != null); checkSpans(spans, 2, new int[]{3,3}); @@ -152,8 +152,8 @@ public class TestPayloadSpans extends LuceneTestCase { spanNearQuery = new SpanNearQuery(clauses, 6, true); - - spans = spanNearQuery.getSpans(searcher.getIndexReader()); + spans = MultiSpansWrapper.wrap(searcher.getTopReaderContext(), spanNearQuery); + assertTrue("spans is null and it shouldn't be", spans != null); checkSpans(spans, 1, new int[]{3}); @@ -175,9 +175,10 @@ public class TestPayloadSpans extends LuceneTestCase { // yy within 6 of xx within 6 of rr - spans = nestedSpanNearQuery.getSpans(searcher.getIndexReader()); + spans = MultiSpansWrapper.wrap(searcher.getTopReaderContext(), nestedSpanNearQuery); assertTrue("spans is null and it shouldn't be", spans != null); checkSpans(spans, 2, new int[]{3,3}); + searcher.close(); closeIndexReader.close(); directory.close(); } @@ -206,10 +207,11 @@ public class TestPayloadSpans extends LuceneTestCase { clauses3[1] = snq; SpanNearQuery nestedSpanNearQuery = new SpanNearQuery(clauses3, 6, false); + spans = MultiSpansWrapper.wrap(searcher.getTopReaderContext(), nestedSpanNearQuery); - spans = nestedSpanNearQuery.getSpans(searcher.getIndexReader()); assertTrue("spans is null and it shouldn't be", spans != null); checkSpans(spans, 1, new int[]{3}); + searcher.close(); closeIndexReader.close(); directory.close(); } @@ -244,9 +246,10 @@ public class TestPayloadSpans extends LuceneTestCase { SpanNearQuery nestedSpanNearQuery = new SpanNearQuery(clauses3, 6, false); - spans = nestedSpanNearQuery.getSpans(searcher.getIndexReader()); + spans = MultiSpansWrapper.wrap(searcher.getTopReaderContext(), nestedSpanNearQuery); assertTrue("spans is null and it shouldn't be", spans != null); checkSpans(spans, 2, new int[]{8, 8}); + searcher.close(); closeIndexReader.close(); directory.close(); } @@ -261,15 +264,15 @@ public class TestPayloadSpans extends LuceneTestCase { doc.add(new Field("content", new StringReader("a b c d e f g h i j a k"))); writer.addDocument(doc); - IndexReader reader = new SlowMultiReaderWrapper(writer.getReader()); - IndexSearcher is = new IndexSearcher(reader); + IndexReader reader = writer.getReader(); + IndexSearcher is = newSearcher(reader); writer.close(); SpanTermQuery stq1 = new SpanTermQuery(new Term("content", "a")); SpanTermQuery stq2 = new SpanTermQuery(new Term("content", "k")); SpanQuery[] sqs = { stq1, stq2 }; SpanNearQuery snq = new SpanNearQuery(sqs, 1, true); - Spans spans = snq.getSpans(is.getIndexReader()); + Spans spans = MultiSpansWrapper.wrap(is.getTopReaderContext(), snq); TopDocs topDocs = is.search(snq, 1); Set payloadSet = new HashSet(); @@ -285,6 +288,7 @@ public class TestPayloadSpans extends LuceneTestCase { assertEquals(2, payloadSet.size()); assertTrue(payloadSet.contains("a:Noise:10")); assertTrue(payloadSet.contains("k:Noise:11")); + is.close(); reader.close(); directory.close(); } @@ -298,15 +302,15 @@ public class TestPayloadSpans extends LuceneTestCase { Document doc = new Document(); doc.add(new Field("content", new StringReader("a b a d k f a h i k a k"))); writer.addDocument(doc); - IndexReader reader = new SlowMultiReaderWrapper(writer.getReader()); - IndexSearcher is = new IndexSearcher(reader); + IndexReader reader = writer.getReader(); + IndexSearcher is = newSearcher(reader); writer.close(); SpanTermQuery stq1 = new SpanTermQuery(new Term("content", "a")); SpanTermQuery stq2 = new SpanTermQuery(new Term("content", "k")); SpanQuery[] sqs = { stq1, stq2 }; SpanNearQuery snq = new SpanNearQuery(sqs, 0, true); - Spans spans = snq.getSpans(is.getIndexReader()); + Spans spans = MultiSpansWrapper.wrap(is.getTopReaderContext(), snq); TopDocs topDocs = is.search(snq, 1); Set payloadSet = new HashSet(); @@ -321,6 +325,7 @@ public class TestPayloadSpans extends LuceneTestCase { assertEquals(2, payloadSet.size()); assertTrue(payloadSet.contains("a:Noise:10")); assertTrue(payloadSet.contains("k:Noise:11")); + is.close(); reader.close(); directory.close(); } @@ -334,15 +339,15 @@ public class TestPayloadSpans extends LuceneTestCase { Document doc = new Document(); doc.add(new Field("content", new StringReader("j k a l f k k p a t a k l k t a"))); writer.addDocument(doc); - IndexReader reader = new SlowMultiReaderWrapper(writer.getReader()); - IndexSearcher is = new IndexSearcher(reader); + IndexReader reader = writer.getReader(); + IndexSearcher is = newSearcher(reader); writer.close(); SpanTermQuery stq1 = new SpanTermQuery(new Term("content", "a")); SpanTermQuery stq2 = new SpanTermQuery(new Term("content", "k")); SpanQuery[] sqs = { stq1, stq2 }; SpanNearQuery snq = new SpanNearQuery(sqs, 0, true); - Spans spans = snq.getSpans(is.getIndexReader()); + Spans spans = MultiSpansWrapper.wrap(is.getTopReaderContext(), snq); TopDocs topDocs = is.search(snq, 1); Set payloadSet = new HashSet(); @@ -363,6 +368,7 @@ public class TestPayloadSpans extends LuceneTestCase { } assertTrue(payloadSet.contains("a:Noise:10")); assertTrue(payloadSet.contains("k:Noise:11")); + is.close(); reader.close(); directory.close(); } @@ -370,17 +376,17 @@ public class TestPayloadSpans extends LuceneTestCase { public void testPayloadSpanUtil() throws Exception { Directory directory = newDirectory(); RandomIndexWriter writer = new RandomIndexWriter(random, directory, - newIndexWriterConfig(TEST_VERSION_CURRENT, new PayloadAnalyzer()).setSimilarity(similarity)); + newIndexWriterConfig(TEST_VERSION_CURRENT, new PayloadAnalyzer()).setSimilarityProvider(similarity)); Document doc = new Document(); doc.add(newField(PayloadHelper.FIELD,"xx rr yy mm pp", Field.Store.YES, Field.Index.ANALYZED)); writer.addDocument(doc); - IndexReader reader = new SlowMultiReaderWrapper(writer.getReader()); + IndexReader reader = writer.getReader(); writer.close(); - IndexSearcher searcher = new IndexSearcher(reader); + IndexSearcher searcher = newSearcher(reader); - PayloadSpanUtil psu = new PayloadSpanUtil(searcher.getIndexReader()); + PayloadSpanUtil psu = new PayloadSpanUtil(searcher.getTopReaderContext()); Collection payloads = psu.getPayloadsForQuery(new TermQuery(new Term(PayloadHelper.FIELD, "rr"))); if(VERBOSE) @@ -389,6 +395,7 @@ public class TestPayloadSpans extends LuceneTestCase { if(VERBOSE) System.out.println(new String(bytes)); } + searcher.close(); reader.close(); directory.close(); } @@ -430,7 +437,7 @@ public class TestPayloadSpans extends LuceneTestCase { directory = newDirectory(); String[] docs = new String[]{"xx rr yy mm pp","xx yy mm rr pp", "nopayload qq ss pp np", "one two three four five six seven eight nine ten eleven", "nine one two three four five six seven eight eleven ten"}; RandomIndexWriter writer = new RandomIndexWriter(random, directory, - newIndexWriterConfig(TEST_VERSION_CURRENT, new PayloadAnalyzer()).setSimilarity(similarity)); + newIndexWriterConfig(TEST_VERSION_CURRENT, new PayloadAnalyzer()).setSimilarityProvider(similarity)); Document doc = null; for(int i = 0; i < docs.length; i++) { @@ -440,10 +447,10 @@ public class TestPayloadSpans extends LuceneTestCase { writer.addDocument(doc); } - closeIndexReader = new SlowMultiReaderWrapper(writer.getReader()); + closeIndexReader = writer.getReader(); writer.close(); - IndexSearcher searcher = new IndexSearcher(closeIndexReader); + IndexSearcher searcher = newSearcher(closeIndexReader); return searcher; } diff --git a/lucene/src/test/org/apache/lucene/search/spans/TestSpanFirstQuery.java b/lucene/src/test/org/apache/lucene/search/spans/TestSpanFirstQuery.java index b95e7719b76..583da5191ab 100644 --- a/lucene/src/test/org/apache/lucene/search/spans/TestSpanFirstQuery.java +++ b/lucene/src/test/org/apache/lucene/search/spans/TestSpanFirstQuery.java @@ -48,7 +48,7 @@ public class TestSpanFirstQuery extends LuceneTestCase { writer.addDocument(doc2); IndexReader reader = writer.getReader(); - IndexSearcher searcher = new IndexSearcher(reader); + IndexSearcher searcher = newSearcher(reader); // user queries on "starts-with quick" SpanQuery sfq = new SpanFirstQuery(new SpanTermQuery(new Term("field", "quick")), 1); diff --git a/lucene/src/test/org/apache/lucene/search/spans/TestSpanMultiTermQueryWrapper.java b/lucene/src/test/org/apache/lucene/search/spans/TestSpanMultiTermQueryWrapper.java index 48c26c92da1..51ae833705a 100644 --- a/lucene/src/test/org/apache/lucene/search/spans/TestSpanMultiTermQueryWrapper.java +++ b/lucene/src/test/org/apache/lucene/search/spans/TestSpanMultiTermQueryWrapper.java @@ -53,7 +53,7 @@ public class TestSpanMultiTermQueryWrapper extends LuceneTestCase { iw.addDocument(doc); reader = iw.getReader(); iw.close(); - searcher = new IndexSearcher(reader); + searcher = newSearcher(reader); } @Override diff --git a/lucene/src/test/org/apache/lucene/search/spans/TestSpans.java b/lucene/src/test/org/apache/lucene/search/spans/TestSpans.java index ac96892d5a0..23cdf4786d9 100644 --- a/lucene/src/test/org/apache/lucene/search/spans/TestSpans.java +++ b/lucene/src/test/org/apache/lucene/search/spans/TestSpans.java @@ -20,22 +20,26 @@ package org.apache.lucene.search.spans; import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.search.Query; import org.apache.lucene.search.CheckHits; -import org.apache.lucene.search.Similarity; import org.apache.lucene.search.DefaultSimilarity; import org.apache.lucene.search.Scorer; +import org.apache.lucene.search.SimilarityProvider; import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.Weight.ScorerContext; import org.apache.lucene.store.Directory; import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.index.IndexReader.ReaderContext; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexReader; -import org.apache.lucene.index.SlowMultiReaderWrapper; +import org.apache.lucene.index.IndexReader.AtomicReaderContext; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.index.Term; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.util.ReaderUtil; + import java.io.IOException; public class TestSpans extends LuceneTestCase { @@ -49,7 +53,7 @@ public class TestSpans extends LuceneTestCase { public void setUp() throws Exception { super.setUp(); directory = newDirectory(); - RandomIndexWriter writer= new RandomIndexWriter(random, directory); + RandomIndexWriter writer= new RandomIndexWriter(random, directory, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).setMergePolicy(newInOrderLogMergePolicy())); for (int i = 0; i < docFields.length; i++) { Document doc = new Document(); doc.add(newField(field, docFields[i], Field.Store.YES, Field.Index.ANALYZED)); @@ -57,7 +61,7 @@ public class TestSpans extends LuceneTestCase { } reader = writer.getReader(); writer.close(); - searcher = new IndexSearcher(reader); + searcher = newSearcher(reader); } @Override @@ -195,7 +199,7 @@ public class TestSpans extends LuceneTestCase { makeSpanTermQuery("t3") }, slop, ordered); - Spans spans = snq.getSpans(new SlowMultiReaderWrapper(searcher.getIndexReader())); + Spans spans = MultiSpansWrapper.wrap(searcher.getTopReaderContext(), snq); assertTrue("first range", spans.next()); assertEquals("first doc", 11, spans.doc()); @@ -221,7 +225,7 @@ public class TestSpans extends LuceneTestCase { makeSpanTermQuery("u2") }, 0, false); - Spans spans = snq.getSpans(new SlowMultiReaderWrapper(searcher.getIndexReader())); + Spans spans = MultiSpansWrapper.wrap(searcher.getTopReaderContext(), snq); assertTrue("Does not have next and it should", spans.next()); assertEquals("doc", 4, spans.doc()); assertEquals("start", 1, spans.start()); @@ -257,7 +261,7 @@ public class TestSpans extends LuceneTestCase { }, 1, false); - spans = snq.getSpans(new SlowMultiReaderWrapper(searcher.getIndexReader())); + spans = MultiSpansWrapper.wrap(searcher.getTopReaderContext(), snq); assertTrue("Does not have next and it should", spans.next()); assertEquals("doc", 4, spans.doc()); assertEquals("start", 0, spans.start()); @@ -315,7 +319,7 @@ public class TestSpans extends LuceneTestCase { for (int i = 0; i < terms.length; i++) { sqa[i] = makeSpanTermQuery(terms[i]); } - return (new SpanOrQuery(sqa)).getSpans(new SlowMultiReaderWrapper(searcher.getIndexReader())); + return MultiSpansWrapper.wrap(searcher.getTopReaderContext(), new SpanOrQuery(sqa)); } private void tstNextSpans(Spans spans, int doc, int start, int end) @@ -400,33 +404,43 @@ public class TestSpans extends LuceneTestCase { public void testSpanScorerZeroSloppyFreq() throws Exception { boolean ordered = true; int slop = 1; - - final Similarity sim = new DefaultSimilarity() { - @Override - public float sloppyFreq(int distance) { - return 0.0f; + ReaderContext topReaderContext = searcher.getTopReaderContext(); + AtomicReaderContext[] leaves = ReaderUtil.leaves(topReaderContext); + int subIndex = ReaderUtil.subIndex(11, leaves); + for (int i = 0; i < leaves.length; i++) { + + + final SimilarityProvider sim = new DefaultSimilarity() { + @Override + public float sloppyFreq(int distance) { + return 0.0f; + } + }; + + final SimilarityProvider oldSim = searcher.getSimilarityProvider(); + Scorer spanScorer; + try { + searcher.setSimilarityProvider(sim); + SpanNearQuery snq = new SpanNearQuery( + new SpanQuery[] { + makeSpanTermQuery("t1"), + makeSpanTermQuery("t2") }, + slop, + ordered); + + spanScorer = snq.weight(searcher).scorer(leaves[i], ScorerContext.def()); + } finally { + searcher.setSimilarityProvider(oldSim); } - }; - - SpanNearQuery snq = new SpanNearQuery( - new SpanQuery[] { - makeSpanTermQuery("t1"), - makeSpanTermQuery("t2") }, - slop, - ordered) { - @Override - public Similarity getSimilarity(IndexSearcher s) { - return sim; + if (i == subIndex) { + assertTrue("first doc", spanScorer.nextDoc() != DocIdSetIterator.NO_MORE_DOCS); + assertEquals("first doc number", spanScorer.docID() + leaves[i].docBase, 11); + float score = spanScorer.score(); + assertTrue("first doc score should be zero, " + score, score == 0.0f); + } else { + assertTrue("no second doc", spanScorer.nextDoc() == DocIdSetIterator.NO_MORE_DOCS); } - }; - - Scorer spanScorer = snq.weight(searcher).scorer(new SlowMultiReaderWrapper(searcher.getIndexReader()), true, false); - - assertTrue("first doc", spanScorer.nextDoc() != DocIdSetIterator.NO_MORE_DOCS); - assertEquals("first doc number", spanScorer.docID(), 11); - float score = spanScorer.score(); - assertTrue("first doc score should be zero, " + score, score == 0.0f); - assertTrue("no second doc", spanScorer.nextDoc() == DocIdSetIterator.NO_MORE_DOCS); + } } // LUCENE-1404 @@ -472,7 +486,7 @@ public class TestSpans extends LuceneTestCase { // Get searcher final IndexReader reader = IndexReader.open(dir, true); - final IndexSearcher searcher = new IndexSearcher(reader); + final IndexSearcher searcher = newSearcher(reader); // Control (make sure docs indexed) assertEquals(2, hitCount(searcher, "the")); @@ -485,6 +499,7 @@ public class TestSpans extends LuceneTestCase { searcher.search(createSpan(0, true, new SpanQuery[] {createSpan(4, false, "chased", "cat"), createSpan("ate")}), 10).totalHits); + searcher.close(); reader.close(); dir.close(); } diff --git a/lucene/src/test/org/apache/lucene/search/spans/TestSpansAdvanced.java b/lucene/src/test/org/apache/lucene/search/spans/TestSpansAdvanced.java index c1ed5283164..e3e2e6774e0 100644 --- a/lucene/src/test/org/apache/lucene/search/spans/TestSpansAdvanced.java +++ b/lucene/src/test/org/apache/lucene/search/spans/TestSpansAdvanced.java @@ -57,15 +57,16 @@ public class TestSpansAdvanced extends LuceneTestCase { // create test index mDirectory = newDirectory(); final RandomIndexWriter writer = new RandomIndexWriter(random, - mDirectory, new MockAnalyzer(MockTokenizer.SIMPLE, true, - MockTokenFilter.ENGLISH_STOPSET, true)); + mDirectory, newIndexWriterConfig(TEST_VERSION_CURRENT, + new MockAnalyzer(MockTokenizer.SIMPLE, true, + MockTokenFilter.ENGLISH_STOPSET, true)).setMergePolicy(newInOrderLogMergePolicy())); addDocument(writer, "1", "I think it should work."); addDocument(writer, "2", "I think it should work."); addDocument(writer, "3", "I think it should work."); addDocument(writer, "4", "I think it should work."); reader = writer.getReader(); writer.close(); - searcher = new IndexSearcher(reader); + searcher = newSearcher(reader); } @Override diff --git a/lucene/src/test/org/apache/lucene/search/spans/TestSpansAdvanced2.java b/lucene/src/test/org/apache/lucene/search/spans/TestSpansAdvanced2.java index c5b187abb3b..6406bddf50c 100644 --- a/lucene/src/test/org/apache/lucene/search/spans/TestSpansAdvanced2.java +++ b/lucene/src/test/org/apache/lucene/search/spans/TestSpansAdvanced2.java @@ -48,7 +48,7 @@ public class TestSpansAdvanced2 extends TestSpansAdvanced { final RandomIndexWriter writer = new RandomIndexWriter(random, mDirectory, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer( MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET, true)) - .setOpenMode(OpenMode.APPEND)); + .setOpenMode(OpenMode.APPEND).setMergePolicy(newInOrderLogMergePolicy())); addDocument(writer, "A", "Should we, could we, would we?"); addDocument(writer, "B", "It should. Should it?"); addDocument(writer, "C", "It shouldn't."); @@ -57,7 +57,7 @@ public class TestSpansAdvanced2 extends TestSpansAdvanced { writer.close(); // re-open the searcher since we added more docs - searcher2 = new IndexSearcher(reader2); + searcher2 = newSearcher(reader2); } @Override diff --git a/lucene/src/test/org/apache/lucene/store/TestBufferedIndexInput.java b/lucene/src/test/org/apache/lucene/store/TestBufferedIndexInput.java index b91f5938d0c..4a4c2780c80 100755 --- a/lucene/src/test/org/apache/lucene/store/TestBufferedIndexInput.java +++ b/lucene/src/test/org/apache/lucene/store/TestBufferedIndexInput.java @@ -271,7 +271,7 @@ public class TestBufferedIndexInput extends LuceneTestCase { assertEquals(reader.docFreq(bbb), 37); dir.tweakBufferSizes(); - IndexSearcher searcher = new IndexSearcher(reader); + IndexSearcher searcher = newSearcher(reader); ScoreDoc[] hits = searcher.search(new TermQuery(bbb), null, 1000).scoreDocs; dir.tweakBufferSizes(); assertEquals(35, hits.length); diff --git a/lucene/src/test/org/apache/lucene/store/TestFileSwitchDirectory.java b/lucene/src/test/org/apache/lucene/store/TestFileSwitchDirectory.java index a5ab76eb65c..635970fd85c 100644 --- a/lucene/src/test/org/apache/lucene/store/TestFileSwitchDirectory.java +++ b/lucene/src/test/org/apache/lucene/store/TestFileSwitchDirectory.java @@ -39,8 +39,10 @@ public class TestFileSwitchDirectory extends LuceneTestCase { fileExtensions.add(IndexFileNames.FIELDS_EXTENSION); fileExtensions.add(IndexFileNames.FIELDS_INDEX_EXTENSION); - Directory primaryDir = new MockDirectoryWrapper(random, new RAMDirectory()); - Directory secondaryDir = new MockDirectoryWrapper(random, new RAMDirectory()); + MockDirectoryWrapper primaryDir = new MockDirectoryWrapper(random, new RAMDirectory()); + primaryDir.setCheckIndexOnClose(false); // only part of an index + MockDirectoryWrapper secondaryDir = new MockDirectoryWrapper(random, new RAMDirectory()); + secondaryDir.setCheckIndexOnClose(false); // only part of an index FileSwitchDirectory fsd = new FileSwitchDirectory(fileExtensions, primaryDir, secondaryDir, true); IndexWriter writer = new IndexWriter( @@ -49,7 +51,7 @@ public class TestFileSwitchDirectory extends LuceneTestCase { setMergePolicy(newLogMergePolicy(false)) ); TestIndexWriterReader.createIndexNoClose(true, "ram", writer); - IndexReader reader = IndexReader.open(writer); + IndexReader reader = IndexReader.open(writer, true); assertEquals(100, reader.maxDoc()); writer.commit(); // we should see only fdx,fdt files here diff --git a/lucene/src/test/org/apache/lucene/store/TestMultiMMap.java b/lucene/src/test/org/apache/lucene/store/TestMultiMMap.java index fc4d31e6dd3..14cf0fd856a 100644 --- a/lucene/src/test/org/apache/lucene/store/TestMultiMMap.java +++ b/lucene/src/test/org/apache/lucene/store/TestMultiMMap.java @@ -20,6 +20,7 @@ package org.apache.lucene.store; import java.io.File; import java.util.Random; +import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.index.IndexReader; @@ -58,7 +59,7 @@ public class TestMultiMMap extends LuceneTestCase { // we will map a lot, try to turn on the unmap hack if (MMapDirectory.UNMAP_SUPPORTED) dir.setUseUnmap(true); - RandomIndexWriter writer = new RandomIndexWriter(random, dir); + RandomIndexWriter writer = new RandomIndexWriter(random, dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).setMergePolicy(newInOrderLogMergePolicy())); Document doc = new Document(); Field docid = newField("docid", "0", Field.Store.YES, Field.Index.NOT_ANALYZED); Field junk = newField("junk", "", Field.Store.YES, Field.Index.NOT_ANALYZED); diff --git a/lucene/src/test/org/apache/lucene/store/TestRAMDirectory.java b/lucene/src/test/org/apache/lucene/store/TestRAMDirectory.java index ac394cbb7ca..94f71aea46e 100644 --- a/lucene/src/test/org/apache/lucene/store/TestRAMDirectory.java +++ b/lucene/src/test/org/apache/lucene/store/TestRAMDirectory.java @@ -82,7 +82,7 @@ public class TestRAMDirectory extends LuceneTestCase { assertEquals(docsToAdd, reader.numDocs()); // open search zo check if all doc's are there - IndexSearcher searcher = new IndexSearcher(reader); + IndexSearcher searcher = newSearcher(reader); // search for all documents for (int i = 0; i < docsToAdd; i++) { @@ -180,4 +180,22 @@ public class TestRAMDirectory extends LuceneTestCase { } dir.delete(); } + + // LUCENE-2852 + public void testSeekToEOFThenBack() throws Exception { + RAMDirectory dir = new RAMDirectory(); + + IndexOutput o = dir.createOutput("out"); + byte[] bytes = new byte[3*RAMInputStream.BUFFER_SIZE]; + o.writeBytes(bytes, 0, bytes.length); + o.close(); + + IndexInput i = dir.openInput("out"); + i.seek(2*RAMInputStream.BUFFER_SIZE-1); + i.seek(3*RAMInputStream.BUFFER_SIZE); + i.seek(RAMInputStream.BUFFER_SIZE); + i.readBytes(bytes, 0, 2*RAMInputStream.BUFFER_SIZE); + i.close(); + dir.close(); + } } diff --git a/lucene/src/test/org/apache/lucene/util/TestAttributeSource.java b/lucene/src/test/org/apache/lucene/util/TestAttributeSource.java index f72f02c1b6a..bf1c0d1934d 100644 --- a/lucene/src/test/org/apache/lucene/util/TestAttributeSource.java +++ b/lucene/src/test/org/apache/lucene/util/TestAttributeSource.java @@ -109,34 +109,6 @@ public class TestAttributeSource extends LuceneTestCase { assertEquals("TypeAttribute of original and clone must be equal", typeAtt2, typeAtt); } - public void testToStringAndMultiAttributeImplementations() { - AttributeSource src = new AttributeSource(); - CharTermAttribute termAtt = src.addAttribute(CharTermAttribute.class); - TypeAttribute typeAtt = src.addAttribute(TypeAttribute.class); - termAtt.append("TestTerm"); - typeAtt.setType("TestType"); - assertEquals("Attributes should appear in original order", "("+termAtt.toString()+","+typeAtt.toString()+")", src.toString()); - Iterator it = src.getAttributeImplsIterator(); - assertTrue("Iterator should have 2 attributes left", it.hasNext()); - assertSame("First AttributeImpl from iterator should be termAtt", termAtt, it.next()); - assertTrue("Iterator should have 1 attributes left", it.hasNext()); - assertSame("Second AttributeImpl from iterator should be typeAtt", typeAtt, it.next()); - assertFalse("Iterator should have 0 attributes left", it.hasNext()); - - src = new AttributeSource(); - src.addAttributeImpl(new Token()); - // this should not add a new attribute as Token implements CharTermAttribute, too - termAtt = src.addAttribute(CharTermAttribute.class); - assertTrue("CharTermAttribute should be implemented by Token", termAtt instanceof Token); - // get the Token attribute and check, that it is the only one - it = src.getAttributeImplsIterator(); - Token tok = (Token) it.next(); - assertFalse("There should be only one attribute implementation instance", it.hasNext()); - - termAtt.setEmpty().append("TestTerm"); - assertEquals("Token should only printed once", "("+tok.toString()+")", src.toString()); - } - public void testDefaultAttributeFactory() throws Exception { AttributeSource src = new AttributeSource(); diff --git a/lucene/src/test/org/apache/lucene/util/TestBytesRefHash.java b/lucene/src/test/org/apache/lucene/util/TestBytesRefHash.java index 553898a8a8c..2c82aea6ff3 100644 --- a/lucene/src/test/org/apache/lucene/util/TestBytesRefHash.java +++ b/lucene/src/test/org/apache/lucene/util/TestBytesRefHash.java @@ -40,6 +40,7 @@ public class TestBytesRefHash extends LuceneTestCase { /** */ + @Override @Before public void setUp() throws Exception { super.setUp(); diff --git a/lucene/src/test/org/apache/lucene/util/TestDoubleBarrelLRUCache.java b/lucene/src/test/org/apache/lucene/util/TestDoubleBarrelLRUCache.java index 03a935a6fef..952c218de3f 100644 --- a/lucene/src/test/org/apache/lucene/util/TestDoubleBarrelLRUCache.java +++ b/lucene/src/test/org/apache/lucene/util/TestDoubleBarrelLRUCache.java @@ -145,14 +145,17 @@ public class TestDoubleBarrelLRUCache extends LuceneTestCase { this.value = value; } + @Override public boolean equals(Object other) { return this.value.equals(((CloneableObject) other).value); } + @Override public int hashCode() { return value.hashCode(); } + @Override public Object clone() { return new CloneableObject(value); } @@ -165,14 +168,17 @@ public class TestDoubleBarrelLRUCache extends LuceneTestCase { this.value = value; } + @Override public boolean equals(Object other) { return this.value.equals(((CloneableInteger) other).value); } + @Override public int hashCode() { return value.hashCode(); } + @Override public Object clone() { return new CloneableInteger(value); } diff --git a/lucene/src/test/org/apache/lucene/util/TestRecyclingByteBlockAllocator.java b/lucene/src/test/org/apache/lucene/util/TestRecyclingByteBlockAllocator.java index beac79df7c9..b6c1c4d4f9c 100644 --- a/lucene/src/test/org/apache/lucene/util/TestRecyclingByteBlockAllocator.java +++ b/lucene/src/test/org/apache/lucene/util/TestRecyclingByteBlockAllocator.java @@ -31,6 +31,7 @@ public class TestRecyclingByteBlockAllocator extends LuceneTestCase { /** */ + @Override @Before public void setUp() throws Exception { super.setUp(); diff --git a/lucene/src/test/org/apache/lucene/util/automaton/fst/TestFSTs.java b/lucene/src/test/org/apache/lucene/util/automaton/fst/TestFSTs.java index 72a68709713..39dc27be1e2 100644 --- a/lucene/src/test/org/apache/lucene/util/automaton/fst/TestFSTs.java +++ b/lucene/src/test/org/apache/lucene/util/automaton/fst/TestFSTs.java @@ -59,11 +59,13 @@ public class TestFSTs extends LuceneTestCase { private MockDirectoryWrapper dir; + @Override public void setUp() throws IOException { dir = newDirectory(); dir.setPreventDoubleWrite(false); } + @Override public void tearDown() throws IOException { dir.close(); } @@ -944,7 +946,7 @@ public class TestFSTs extends LuceneTestCase { CodecProvider.getDefault().setDefaultFieldCodec("Standard"); } - final LineFileDocs docs = new LineFileDocs(false); + final LineFileDocs docs = new LineFileDocs(random); final int RUN_TIME_SEC = LuceneTestCase.TEST_NIGHTLY ? 100 : 1; final IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).setMaxBufferedDocs(-1).setRAMBufferSizeMB(64); final File tempDir = _TestUtil.getTempDir("fstlines"); @@ -958,7 +960,7 @@ public class TestFSTs extends LuceneTestCase { writer.addDocument(doc); docCount++; } - IndexReader r = IndexReader.open(writer); + IndexReader r = IndexReader.open(writer, true); writer.close(); final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(random.nextBoolean()); Builder builder = new Builder(FST.INPUT_TYPE.BYTE1, 0, 0, true, outputs); @@ -974,6 +976,9 @@ public class TestFSTs extends LuceneTestCase { Terms terms = MultiFields.getTerms(r, "body"); if (terms != null) { final TermsEnum termsEnum = terms.iterator(); + if (VERBOSE) { + System.out.println("TEST: got termsEnum=" + termsEnum); + } BytesRef term; int ord = 0; while((term = termsEnum.next()) != null) { @@ -981,6 +986,9 @@ public class TestFSTs extends LuceneTestCase { try { termsEnum.ord(); } catch (UnsupportedOperationException uoe) { + if (VERBOSE) { + System.out.println("TEST: codec doesn't support ord; FST stores docFreq"); + } storeOrd = false; } } @@ -1022,6 +1030,9 @@ public class TestFSTs extends LuceneTestCase { for(int nextIter=0;nextIter<10;nextIter++) { if (VERBOSE) { System.out.println("TEST: next"); + if (storeOrd) { + System.out.println(" ord=" + termsEnum.ord()); + } } if (termsEnum.next() != null) { if (VERBOSE) { diff --git a/modules/analysis/CHANGES.txt b/modules/analysis/CHANGES.txt index 59dbc6024d0..69c72793c10 100644 --- a/modules/analysis/CHANGES.txt +++ b/modules/analysis/CHANGES.txt @@ -9,12 +9,14 @@ API Changes * LUCENE-2413: Removed the AnalyzerUtil in common/miscellaneous. (Robert Muir) - * LUCENE-2167,LUCENE-2699,LUCENE-2763: StandardTokenizer/Analyzer in - common/standard/ now implement the Word Break rules from the Unicode 6.0.0 - Text Segmentation algorithm (UAX#29). + * LUCENE-2167,LUCENE-2699,LUCENE-2763,LUCENE-2847: StandardTokenizer/Analyzer + in common/standard/ now implement the Word Break rules from the Unicode 6.0.0 + Text Segmentation algorithm (UAX#29), covering the full range of Unicode code + points, including values from U+FFFF to U+10FFFF - ClassicTokenizer/Analyzer retains the old StandardTokenizer/Analyzer - implementation and behavior. + ClassicTokenizer/Analyzer retains the old (pre-Lucene 3.1) StandardTokenizer/ + Analyzer implementation and behavior. Only the Unicode Basic Multilingual + Plane (code points from U+0000 to U+FFFF) is covered. UAX29URLEmailTokenizer tokenizes URLs and E-mail addresses according to the relevant RFCs, in addition to implementing the UAX#29 Word Break rules. @@ -78,6 +80,9 @@ New Features - o.a.l.analysis.StopwordAnalyzerBase -> o.a.l.analysis.util.StopwordAnalyzerBase - o.a.l.analysis.WordListLoader -> o.a.l.analysis.util.WordListLoader + * SOLR-1057: Add PathHierarchyTokenizer that represents file path hierarchies as synonyms of + /something, /something/something, /something/something/else. (Ryan McKinley, Koji Sekiguchi) + Build * LUCENE-2413: All analyzers in contrib/analyzers and contrib/icu were moved to the diff --git a/modules/analysis/common/build.xml b/modules/analysis/common/build.xml index 3a7d30334e7..75e93b58c57 100644 --- a/modules/analysis/common/build.xml +++ b/modules/analysis/common/build.xml @@ -25,12 +25,12 @@ - + @@ -38,8 +38,15 @@ - + + + + + + + diff --git a/modules/analysis/common/pom.xml.template b/modules/analysis/common/pom.xml.template deleted file mode 100644 index cd4478b2de9..00000000000 --- a/modules/analysis/common/pom.xml.template +++ /dev/null @@ -1,35 +0,0 @@ - - - - 4.0.0 - - org.apache.lucene - lucene-contrib - @version@ - - org.apache.lucene - lucene-analyzers-common - Lucene Analyzers - @version@ - Additional Analyzers - jar - diff --git a/modules/analysis/common/src/java/org/apache/lucene/analysis/br/BrazilianAnalyzer.java b/modules/analysis/common/src/java/org/apache/lucene/analysis/br/BrazilianAnalyzer.java index 1b144b45918..5e35643c77d 100644 --- a/modules/analysis/common/src/java/org/apache/lucene/analysis/br/BrazilianAnalyzer.java +++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/br/BrazilianAnalyzer.java @@ -82,7 +82,7 @@ public final class BrazilianAnalyzer extends StopwordAnalyzerBase { private Set excltable = Collections.emptySet(); /** - * Builds an analyzer with the default stop words ({@link #BRAZILIAN_STOP_WORDS}). + * Builds an analyzer with the default stop words ({@link #getDefaultStopSet()}). */ public BrazilianAnalyzer(Version matchVersion) { this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET); diff --git a/modules/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.java b/modules/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.java index 4ab01ab0d32..ff3f20fb8a1 100644 --- a/modules/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.java +++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.java @@ -101,6 +101,7 @@ public class HTMLStripCharFilter extends BaseCharFilter { if (len>0) { return pushed.charAt(len-1); } + numRead++; int ch = input.read(); push(ch); return ch; @@ -672,6 +673,7 @@ public class HTMLStripCharFilter extends BaseCharFilter { + @Override public int read() throws IOException { // TODO: Do we ever want to preserve CDATA sections? // where do we have to worry about them? @@ -740,6 +742,7 @@ public class HTMLStripCharFilter extends BaseCharFilter { } + @Override public int read(char cbuf[], int off, int len) throws IOException { int i=0; for (i=0; ioutput:"the-rain", "rain-in" ,"in-spain", "falls", "mainly" * */ + @Override public boolean incrementToken() throws IOException { while (input.incrementToken()) { State current = captureState(); diff --git a/modules/analysis/common/src/java/org/apache/lucene/analysis/core/StopFilter.java b/modules/analysis/common/src/java/org/apache/lucene/analysis/core/StopFilter.java index 0aba57fd08e..45b847a833e 100644 --- a/modules/analysis/common/src/java/org/apache/lucene/analysis/core/StopFilter.java +++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/core/StopFilter.java @@ -22,10 +22,9 @@ import java.util.Arrays; import java.util.List; import java.util.Set; -import org.apache.lucene.analysis.TokenFilter; +import org.apache.lucene.analysis.util.FilteringTokenFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; -import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; import org.apache.lucene.analysis.util.CharArraySet; import org.apache.lucene.queryParser.QueryParser; import org.apache.lucene.util.Version; @@ -42,14 +41,10 @@ import org.apache.lucene.util.Version; * increments are preserved * */ -public final class StopFilter extends TokenFilter { +public final class StopFilter extends FilteringTokenFilter { private final CharArraySet stopWords; - private boolean enablePositionIncrements = true; - private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); - private final PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class); - /** * Construct a token stream filtering the given input. If @@ -75,7 +70,7 @@ public final class StopFilter extends TokenFilter { */ public StopFilter(Version matchVersion, TokenStream input, Set stopWords, boolean ignoreCase) { - super(input); + super(true, input); this.stopWords = stopWords instanceof CharArraySet ? (CharArraySet) stopWords : new CharArraySet(matchVersion, stopWords, ignoreCase); } @@ -157,48 +152,8 @@ public final class StopFilter extends TokenFilter { * Returns the next input Token whose term() is not a stop word. */ @Override - public final boolean incrementToken() throws IOException { - // return the first non-stop word found - int skippedPositions = 0; - while (input.incrementToken()) { - if (!stopWords.contains(termAtt.buffer(), 0, termAtt.length())) { - if (enablePositionIncrements) { - posIncrAtt.setPositionIncrement(posIncrAtt.getPositionIncrement() + skippedPositions); - } - return true; - } - skippedPositions += posIncrAtt.getPositionIncrement(); - } - // reached EOS -- return false - return false; + protected boolean accept() throws IOException { + return !stopWords.contains(termAtt.buffer(), 0, termAtt.length()); } - /** - * @see #setEnablePositionIncrements(boolean) - */ - public boolean getEnablePositionIncrements() { - return enablePositionIncrements; - } - - /** - * If true, this StopFilter will preserve - * positions of the incoming tokens (ie, accumulate and - * set position increments of the removed stop tokens). - * Generally, true is best as it does not - * lose information (positions of the original tokens) - * during indexing. - * - * Default is true. - * - *

    When set, when a token is stopped - * (omitted), the position increment of the following - * token is incremented. - * - *

    NOTE: be sure to also - * set {@link QueryParser#setEnablePositionIncrements} if - * you use QueryParser to create queries. - */ - public void setEnablePositionIncrements(boolean enable) { - this.enablePositionIncrements = enable; - } } diff --git a/modules/analysis/common/src/java/org/apache/lucene/analysis/cz/CzechAnalyzer.java b/modules/analysis/common/src/java/org/apache/lucene/analysis/cz/CzechAnalyzer.java index 89e58f4cf87..d23f759adc6 100644 --- a/modules/analysis/common/src/java/org/apache/lucene/analysis/cz/CzechAnalyzer.java +++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/cz/CzechAnalyzer.java @@ -86,7 +86,7 @@ public final class CzechAnalyzer extends StopwordAnalyzerBase { private final Set stemExclusionTable; /** - * Builds an analyzer with the default stop words ({@link #CZECH_STOP_WORDS}). + * Builds an analyzer with the default stop words ({@link #getDefaultStopSet()}). * * @param matchVersion Lucene version to match See * {@link above} diff --git a/modules/analysis/common/src/java/org/apache/lucene/analysis/fa/PersianCharFilter.java b/modules/analysis/common/src/java/org/apache/lucene/analysis/fa/PersianCharFilter.java index c1ed38acfa5..962f839d45c 100644 --- a/modules/analysis/common/src/java/org/apache/lucene/analysis/fa/PersianCharFilter.java +++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/fa/PersianCharFilter.java @@ -32,6 +32,7 @@ public class PersianCharFilter extends CharFilter { super(in); } + @Override public int read(char[] cbuf, int off, int len) throws IOException { final int charsRead = super.read(cbuf, off, len); if (charsRead > 0) { diff --git a/modules/analysis/common/src/java/org/apache/lucene/analysis/gl/GalicianAnalyzer.java b/modules/analysis/common/src/java/org/apache/lucene/analysis/gl/GalicianAnalyzer.java new file mode 100644 index 00000000000..9a64f045d9e --- /dev/null +++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/gl/GalicianAnalyzer.java @@ -0,0 +1,129 @@ +package org.apache.lucene.analysis.gl; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.io.Reader; +import java.util.Set; + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.core.LowerCaseFilter; +import org.apache.lucene.analysis.core.StopFilter; +import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.Tokenizer; +import org.apache.lucene.analysis.standard.StandardFilter; +import org.apache.lucene.analysis.standard.StandardTokenizer; +import org.apache.lucene.analysis.util.CharArraySet; +import org.apache.lucene.analysis.util.StopwordAnalyzerBase; +import org.apache.lucene.analysis.util.WordlistLoader; +import org.apache.lucene.util.Version; + +/** + * {@link Analyzer} for Galician. + */ +public final class GalicianAnalyzer extends StopwordAnalyzerBase { + private final Set stemExclusionSet; + + /** File containing default Galician stopwords. */ + public final static String DEFAULT_STOPWORD_FILE = "stopwords.txt"; + + /** + * Returns an unmodifiable instance of the default stop words set. + * @return default stop words set. + */ + public static Set getDefaultStopSet(){ + return DefaultSetHolder.DEFAULT_STOP_SET; + } + + /** + * Atomically loads the DEFAULT_STOP_SET in a lazy fashion once the outer class + * accesses the static final set the first time.; + */ + private static class DefaultSetHolder { + static final Set DEFAULT_STOP_SET; + + static { + try { + DEFAULT_STOP_SET = WordlistLoader.getWordSet(GalicianAnalyzer.class, + DEFAULT_STOPWORD_FILE); + } catch (IOException ex) { + // default set should always be present as it is part of the + // distribution (JAR) + throw new RuntimeException("Unable to load default stopword set"); + } + } + } + + /** + * Builds an analyzer with the default stop words: {@link #DEFAULT_STOPWORD_FILE}. + */ + public GalicianAnalyzer(Version matchVersion) { + this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET); + } + + /** + * Builds an analyzer with the given stop words. + * + * @param matchVersion lucene compatibility version + * @param stopwords a stopword set + */ + public GalicianAnalyzer(Version matchVersion, Set stopwords) { + this(matchVersion, stopwords, CharArraySet.EMPTY_SET); + } + + /** + * Builds an analyzer with the given stop words. If a non-empty stem exclusion set is + * provided this analyzer will add a {@link KeywordMarkerFilter} before + * stemming. + * + * @param matchVersion lucene compatibility version + * @param stopwords a stopword set + * @param stemExclusionSet a set of terms not to be stemmed + */ + public GalicianAnalyzer(Version matchVersion, Set stopwords, Set stemExclusionSet) { + super(matchVersion, stopwords); + this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy( + matchVersion, stemExclusionSet)); + } + + /** + * Creates a + * {@link org.apache.lucene.analysis.util.ReusableAnalyzerBase.TokenStreamComponents} + * which tokenizes all the text in the provided {@link Reader}. + * + * @return A + * {@link org.apache.lucene.analysis.util.ReusableAnalyzerBase.TokenStreamComponents} + * built from an {@link StandardTokenizer} filtered with + * {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter} + * , {@link KeywordMarkerFilter} if a stem exclusion set is + * provided and {@link GalicianStemFilter}. + */ + @Override + protected TokenStreamComponents createComponents(String fieldName, + Reader reader) { + final Tokenizer source = new StandardTokenizer(matchVersion, reader); + TokenStream result = new StandardFilter(matchVersion, source); + result = new LowerCaseFilter(matchVersion, result); + result = new StopFilter(matchVersion, result, stopwords); + if(!stemExclusionSet.isEmpty()) + result = new KeywordMarkerFilter(result, stemExclusionSet); + result = new GalicianStemFilter(result); + return new TokenStreamComponents(source, result); + } +} diff --git a/modules/analysis/common/src/java/org/apache/lucene/analysis/gl/GalicianStemFilter.java b/modules/analysis/common/src/java/org/apache/lucene/analysis/gl/GalicianStemFilter.java new file mode 100644 index 00000000000..38bf6a181ec --- /dev/null +++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/gl/GalicianStemFilter.java @@ -0,0 +1,60 @@ +package org.apache.lucene.analysis.gl; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; + +import org.apache.lucene.analysis.TokenFilter; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; +import org.apache.lucene.analysis.tokenattributes.KeywordAttribute; + +/** + * A {@link TokenFilter} that applies {@link GalicianStemmer} to stem + * Galician words. + *

    + * To prevent terms from being stemmed use an instance of + * {@link KeywordMarkerFilter} or a custom {@link TokenFilter} that sets + * the {@link KeywordAttribute} before this {@link TokenStream}. + *

    + */ +public final class GalicianStemFilter extends TokenFilter { + private final GalicianStemmer stemmer = new GalicianStemmer(); + private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); + private final KeywordAttribute keywordAttr = addAttribute(KeywordAttribute.class); + + public GalicianStemFilter(TokenStream input) { + super(input); + } + + @Override + public boolean incrementToken() throws IOException { + if (input.incrementToken()) { + if (!keywordAttr.isKeyword()) { + // this stemmer increases word length by 1: worst case '*çom' -> '*ción' + final int len = termAtt.length(); + final int newlen = stemmer.stem(termAtt.resizeBuffer(len+1), len); + termAtt.setLength(newlen); + } + return true; + } else { + return false; + } + } +} diff --git a/modules/analysis/common/src/java/org/apache/lucene/analysis/gl/GalicianStemmer.java b/modules/analysis/common/src/java/org/apache/lucene/analysis/gl/GalicianStemmer.java new file mode 100644 index 00000000000..ba2ffb32964 --- /dev/null +++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/gl/GalicianStemmer.java @@ -0,0 +1,83 @@ +package org.apache.lucene.analysis.gl; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.util.Map; + +import org.apache.lucene.analysis.pt.RSLPStemmerBase; + +/** + * Galician stemmer implementing "Regras do lematizador para o galego". + * + * @see RSLPStemmerBase + * @see Description of rules + */ +public class GalicianStemmer extends RSLPStemmerBase { + private static final Step plural, unification, adverb, augmentative, noun, verb, vowel; + + static { + Map steps = parse(GalicianStemmer.class, "galician.rslp"); + plural = steps.get("Plural"); + unification = steps.get("Unification"); + adverb = steps.get("Adverb"); + augmentative = steps.get("Augmentative"); + noun = steps.get("Noun"); + verb = steps.get("Verb"); + vowel = steps.get("Vowel"); + } + + /** + * @param s buffer, oversized to at least len+1 + * @param len initial valid length of buffer + * @return new valid length, stemmed + */ + public int stem(char s[], int len) { + assert s.length >= len + 1 : "this stemmer requires an oversized array of at least 1"; + + len = plural.apply(s, len); + len = unification.apply(s, len); + len = adverb.apply(s, len); + + int oldlen; + do { + oldlen = len; + len = augmentative.apply(s, len); + } while (len != oldlen); + + oldlen = len; + len = noun.apply(s, len); + if (len == oldlen) { /* suffix not removed */ + len = verb.apply(s, len); + } + + len = vowel.apply(s, len); + + // RSLG accent removal + for (int i = 0; i < len; i++) + switch(s[i]) { + case 'á': s[i] = 'a'; break; + case 'é': + case 'ê': s[i] = 'e'; break; + case 'í': s[i] = 'i'; break; + case 'ó': s[i] = 'o'; break; + case 'ú': s[i] = 'u'; break; + } + + return len; + } +} diff --git a/modules/analysis/common/src/java/org/apache/lucene/analysis/gl/package.html b/modules/analysis/common/src/java/org/apache/lucene/analysis/gl/package.html new file mode 100644 index 00000000000..f6d977a77c4 --- /dev/null +++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/gl/package.html @@ -0,0 +1,22 @@ + + + + +Analyzer for Galician. + + diff --git a/modules/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/KeepWordFilter.java b/modules/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/KeepWordFilter.java index e488fe4dd46..935c96f5bb7 100644 --- a/modules/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/KeepWordFilter.java +++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/KeepWordFilter.java @@ -21,6 +21,7 @@ import java.io.IOException; import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.util.FilteringTokenFilter; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.util.CharArraySet; @@ -30,22 +31,19 @@ import org.apache.lucene.analysis.util.CharArraySet; * * @since solr 1.3 */ -public final class KeepWordFilter extends TokenFilter { +public final class KeepWordFilter extends FilteringTokenFilter { private final CharArraySet words; private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); /** The words set passed to this constructor will be directly used by this filter * and should not be modified, */ - public KeepWordFilter(TokenStream in, CharArraySet words) { - super(in); + public KeepWordFilter(boolean enablePositionIncrements, TokenStream in, CharArraySet words) { + super(enablePositionIncrements, in); this.words = words; } @Override - public boolean incrementToken() throws IOException { - while (input.incrementToken()) { - if (words.contains(termAtt.buffer(), 0, termAtt.length())) return true; - } - return false; + public boolean accept() throws IOException { + return words.contains(termAtt.buffer(), 0, termAtt.length()); } } diff --git a/modules/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/KeywordMarkerFilter.java b/modules/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/KeywordMarkerFilter.java index b5fb812baca..7a55e32c53f 100644 --- a/modules/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/KeywordMarkerFilter.java +++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/KeywordMarkerFilter.java @@ -74,10 +74,12 @@ public final class KeywordMarkerFilter extends TokenFilter { @Override public final boolean incrementToken() throws IOException { if (input.incrementToken()) { - keywordAttr.setKeyword(keywordSet.contains(termAtt.buffer(), 0, - termAtt.length())); + if (keywordSet.contains(termAtt.buffer(), 0, termAtt.length())) { + keywordAttr.setKeyword(true); + } return true; - } else + } else { return false; + } } } diff --git a/modules/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/LengthFilter.java b/modules/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/LengthFilter.java index bfccddbeab4..3f36f2f48e2 100644 --- a/modules/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/LengthFilter.java +++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/LengthFilter.java @@ -21,6 +21,7 @@ import java.io.IOException; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.TokenFilter; +import org.apache.lucene.analysis.util.FilteringTokenFilter; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; /** @@ -29,7 +30,7 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; * Note: Length is calculated as the number of UTF-16 code units. *

    */ -public final class LengthFilter extends TokenFilter { +public final class LengthFilter extends FilteringTokenFilter { private final int min; private final int max; @@ -40,27 +41,15 @@ public final class LengthFilter extends TokenFilter { * Build a filter that removes words that are too long or too * short from the text. */ - public LengthFilter(TokenStream in, int min, int max) - { - super(in); + public LengthFilter(boolean enablePositionIncrements, TokenStream in, int min, int max) { + super(enablePositionIncrements, in); this.min = min; this.max = max; } - /** - * Returns the next input Token whose term() is the right len - */ @Override - public final boolean incrementToken() throws IOException { - // return the first non-stop word found - while (input.incrementToken()) { - int len = termAtt.length(); - if (len >= min && len <= max) { - return true; - } - // note: else we ignore it but should we index each part of it? - } - // reached EOS -- return false - return false; + public boolean accept() throws IOException { + final int len = termAtt.length(); + return (len >= min && len <= max); } } diff --git a/modules/analysis/common/src/java/org/apache/lucene/analysis/nl/DutchAnalyzer.java b/modules/analysis/common/src/java/org/apache/lucene/analysis/nl/DutchAnalyzer.java index f7a295a93ff..9aca85ccbcc 100644 --- a/modules/analysis/common/src/java/org/apache/lucene/analysis/nl/DutchAnalyzer.java +++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/nl/DutchAnalyzer.java @@ -109,7 +109,7 @@ public final class DutchAnalyzer extends ReusableAnalyzerBase { private final Version matchVersion; /** - * Builds an analyzer with the default stop words ({@link #DUTCH_STOP_WORDS}) + * Builds an analyzer with the default stop words ({@link #getDefaultStopSet()}) * and a few default entries for the stem exclusion table. * */ diff --git a/modules/analysis/common/src/java/org/apache/lucene/analysis/path/PathHierarchyTokenizer.java b/modules/analysis/common/src/java/org/apache/lucene/analysis/path/PathHierarchyTokenizer.java new file mode 100644 index 00000000000..b0cd8d60cfc --- /dev/null +++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/path/PathHierarchyTokenizer.java @@ -0,0 +1,150 @@ +package org.apache.lucene.analysis.path; +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.io.Reader; + +import org.apache.lucene.analysis.Tokenizer; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; +import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; +import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; + +/** + * + * Take something like: + * + *
    + *  /soemthing/something/else
    + * 
    + * + * and make: + * + *
    + *  /soemthing
    + *  /soemthing/something
    + *  /soemthing/something/else
    + * 
    + * + */ +public class PathHierarchyTokenizer extends Tokenizer { + + public PathHierarchyTokenizer(Reader input) { + this(input, DEFAULT_BUFFER_SIZE, DEFAULT_DELIMITER); + } + + public PathHierarchyTokenizer(Reader input, int bufferSize, char delimiter) { + this(input, bufferSize, delimiter, delimiter); + } + + public PathHierarchyTokenizer(Reader input, char delimiter, char replacement) { + this(input, DEFAULT_BUFFER_SIZE, delimiter, replacement); + } + + public PathHierarchyTokenizer(Reader input, int bufferSize, char delimiter, char replacement) { + super(input); + termAtt.resizeBuffer(bufferSize); + this.delimiter = delimiter; + this.replacement = replacement; + endDelimiter = false; + resultToken = new StringBuilder(bufferSize); + } + + private static final int DEFAULT_BUFFER_SIZE = 1024; + public static final char DEFAULT_DELIMITER = '/'; + private final char delimiter; + private final char replacement; + + private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); + private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class); + private final PositionIncrementAttribute posAtt = addAttribute(PositionIncrementAttribute.class); + private int finalOffset = 0; + private boolean endDelimiter; + private StringBuilder resultToken; + + @Override + public final boolean incrementToken() throws IOException { + clearAttributes(); + termAtt.append( resultToken ); + if(resultToken.length() == 0){ + posAtt.setPositionIncrement(1); + } + else{ + posAtt.setPositionIncrement(0); + } + int length = 0; + boolean added = false; + if( endDelimiter ){ + termAtt.append(replacement); + length++; + endDelimiter = false; + added = true; + } + + while (true) { + int c = input.read(); + if( c < 0 ) { + length += resultToken.length(); + termAtt.setLength(length); + finalOffset = correctOffset(length); + offsetAtt.setOffset(correctOffset(0), finalOffset); + if( added ){ + resultToken.setLength(0); + resultToken.append(termAtt.buffer(), 0, length); + } + return added; + } + added = true; + if( c == delimiter ) { + if( length > 0 ){ + endDelimiter = true; + break; + } + else{ + termAtt.append(replacement); + length++; + } + } + else { + termAtt.append((char)c); + length++; + } + } + + length += resultToken.length(); + termAtt.setLength(length); + finalOffset = correctOffset(length); + offsetAtt.setOffset(correctOffset(0), finalOffset); + resultToken.setLength(0); + resultToken.append(termAtt.buffer(), 0, length); + return true; + } + + @Override + public final void end() { + // set final offset + offsetAtt.setOffset(finalOffset, finalOffset); + } + + @Override + public void reset(Reader input) throws IOException { + super.reset(input); + resultToken.setLength(0); + finalOffset = 0; + endDelimiter = false; + } +} diff --git a/modules/analysis/common/src/java/org/apache/lucene/analysis/pattern/PatternReplaceCharFilter.java b/modules/analysis/common/src/java/org/apache/lucene/analysis/pattern/PatternReplaceCharFilter.java index 0ccbb85b074..77f5c95475f 100644 --- a/modules/analysis/common/src/java/org/apache/lucene/analysis/pattern/PatternReplaceCharFilter.java +++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/pattern/PatternReplaceCharFilter.java @@ -113,6 +113,7 @@ public class PatternReplaceCharFilter extends BaseCharFilter { } } + @Override public int read() throws IOException { while( prepareReplaceBlock() ){ return replaceBlockBuffer.charAt( replaceBlockBufferOffset++ ); @@ -120,6 +121,7 @@ public class PatternReplaceCharFilter extends BaseCharFilter { return -1; } + @Override public int read(char[] cbuf, int off, int len) throws IOException { char[] tmp = new char[len]; int l = input.read(tmp, 0, len); diff --git a/modules/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseMinimalStemmer.java b/modules/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseMinimalStemmer.java index 7ce19e37445..01342ce9a11 100644 --- a/modules/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseMinimalStemmer.java +++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseMinimalStemmer.java @@ -1,10 +1,5 @@ package org.apache.lucene.analysis.pt; -import java.util.Arrays; - -import org.apache.lucene.analysis.util.CharArraySet; -import org.apache.lucene.util.Version; - /** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with @@ -31,89 +26,14 @@ import org.apache.lucene.util.Version; * which is just the plural reduction step of the RSLP * algorithm from A Stemming Algorithmm for the Portuguese Language, * Orengo et al. + * @see RSLPStemmerBase */ -public class PortugueseMinimalStemmer { +public class PortugueseMinimalStemmer extends RSLPStemmerBase { - private static final CharArraySet excIS = new CharArraySet(Version.LUCENE_31, - Arrays.asList("lápis", "cais", "mais", "crúcis", "biquínis", "pois", - "depois","dois","leis"), - false); - - private static final CharArraySet excS = new CharArraySet(Version.LUCENE_31, - Arrays.asList("aliás", "pires", "lápis", "cais", "mais", "mas", "menos", - "férias", "fezes", "pêsames", "crúcis", "gás", "atrás", "moisés", - "através", "convés", "ês", "país", "após", "ambas", "ambos", - "messias", "depois"), - false); + private static final Step pluralStep = + parse(PortugueseMinimalStemmer.class, "portuguese.rslp").get("Plural"); public int stem(char s[], int len) { - if (len < 3 || s[len-1] != 's') - return len; - - if (s[len-2] == 'n') { - len--; - s[len-1] = 'm'; - return len; - } - - if (len >= 6 && s[len-3] == 'õ' && s[len-2] == 'e') { - len--; - s[len-2] = 'ã'; - s[len-1] = 'o'; - return len; - } - - if (len >= 4 && s[len-3] == 'ã' && s[len-2] == 'e') - if (!(len == 4 && s[0] == 'm')) { - len--; - s[len-1] = 'o'; - return len; - } - - if (len >= 4 && s[len-2] == 'i') { - if (s[len-3] == 'a') - if (!(len == 4 && (s[0] == 'c' || s[0] == 'm'))) { - len--; - s[len-1] = 'l'; - return len; - } - - if (len >= 5 && s[len-3] == 'é') { - len--; - s[len-2] = 'e'; - s[len-1] = 'l'; - return len; - } - - if (len >= 5 && s[len-3] == 'e') { - len--; - s[len-1] = 'l'; - return len; - } - - if (len >= 5 && s[len-3] == 'ó') { - len--; - s[len-2] = 'o'; - s[len-1] = 'l'; - return len; - } - - if (!excIS.contains(s, 0, len)) { - s[len-1] = 'l'; - return len; - } - } - - if (len >= 6 && s[len-3] == 'l' && s[len-2] == 'e') - return len - 2; - - if (len >= 6 && s[len-3] == 'r' && s[len-2] == 'e') - if (!(len == 7 && s[0] == 'á' && s[1] == 'r' && s[2] == 'v' && s[3] == 'o')) - return len - 2; - - if (excS.contains(s, 0, len)) - return len; - else - return len-1; + return pluralStep.apply(s, len); } } diff --git a/modules/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseStemFilter.java b/modules/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseStemFilter.java new file mode 100644 index 00000000000..c761abd14e9 --- /dev/null +++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseStemFilter.java @@ -0,0 +1,60 @@ +package org.apache.lucene.analysis.pt; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; + +import org.apache.lucene.analysis.TokenFilter; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; +import org.apache.lucene.analysis.tokenattributes.KeywordAttribute; + +/** + * A {@link TokenFilter} that applies {@link PortugueseStemmer} to stem + * Portuguese words. + *

    + * To prevent terms from being stemmed use an instance of + * {@link KeywordMarkerFilter} or a custom {@link TokenFilter} that sets + * the {@link KeywordAttribute} before this {@link TokenStream}. + *

    + */ +public final class PortugueseStemFilter extends TokenFilter { + private final PortugueseStemmer stemmer = new PortugueseStemmer(); + private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); + private final KeywordAttribute keywordAttr = addAttribute(KeywordAttribute.class); + + public PortugueseStemFilter(TokenStream input) { + super(input); + } + + @Override + public boolean incrementToken() throws IOException { + if (input.incrementToken()) { + if (!keywordAttr.isKeyword()) { + // this stemmer increases word length by 1: worst case '*ã' -> '*ão' + final int len = termAtt.length(); + final int newlen = stemmer.stem(termAtt.resizeBuffer(len+1), len); + termAtt.setLength(newlen); + } + return true; + } else { + return false; + } + } +} diff --git a/modules/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseStemmer.java b/modules/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseStemmer.java new file mode 100644 index 00000000000..de0e497db9c --- /dev/null +++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseStemmer.java @@ -0,0 +1,102 @@ +package org.apache.lucene.analysis.pt; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.util.Map; + +/** + * Portuguese stemmer implementing the RSLP (Removedor de Sufixos da Lingua Portuguesa) + * algorithm. This is sometimes also referred to as the Orengo stemmer. + * + * @see RSLPStemmerBase + */ +public class PortugueseStemmer extends RSLPStemmerBase { + private static final Step plural, feminine, adverb, augmentative, noun, verb, vowel; + + static { + Map steps = parse(PortugueseStemmer.class, "portuguese.rslp"); + plural = steps.get("Plural"); + feminine = steps.get("Feminine"); + adverb = steps.get("Adverb"); + augmentative = steps.get("Augmentative"); + noun = steps.get("Noun"); + verb = steps.get("Verb"); + vowel = steps.get("Vowel"); + } + + /** + * @param s buffer, oversized to at least len+1 + * @param len initial valid length of buffer + * @return new valid length, stemmed + */ + public int stem(char s[], int len) { + assert s.length >= len + 1 : "this stemmer requires an oversized array of at least 1"; + + len = plural.apply(s, len); + len = adverb.apply(s, len); + len = feminine.apply(s, len); + len = augmentative.apply(s, len); + + int oldlen = len; + len = noun.apply(s, len); + + if (len == oldlen) { /* suffix not removed */ + oldlen = len; + + len = verb.apply(s, len); + + if (len == oldlen) { /* suffix not removed */ + len = vowel.apply(s, len); + } + } + + // rslp accent removal + for (int i = 0; i < len; i++) { + switch(s[i]) { + case 'à': + case 'á': + case 'â': + case 'ã': + case 'ä': + case 'å': s[i] = 'a'; break; + case 'ç': s[i] = 'c'; break; + case 'è': + case 'é': + case 'ê': + case 'ë': s[i] = 'e'; break; + case 'ì': + case 'í': + case 'î': + case 'ï': s[i] = 'i'; break; + case 'ñ': s[i] = 'n'; break; + case 'ò': + case 'ó': + case 'ô': + case 'õ': + case 'ö': s[i] = 'o'; break; + case 'ù': + case 'ú': + case 'û': + case 'ü': s[i] = 'u'; break; + case 'ý': + case 'ÿ': s[i] = 'y'; break; + } + } + return len; + } +} diff --git a/modules/analysis/common/src/java/org/apache/lucene/analysis/pt/RSLPStemmerBase.java b/modules/analysis/common/src/java/org/apache/lucene/analysis/pt/RSLPStemmerBase.java new file mode 100644 index 00000000000..2fb7a1f236e --- /dev/null +++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/pt/RSLPStemmerBase.java @@ -0,0 +1,345 @@ +package org.apache.lucene.analysis.pt; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.io.LineNumberReader; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import org.apache.lucene.analysis.util.CharArraySet; +import org.apache.lucene.util.Version; + +import static org.apache.lucene.analysis.util.StemmerUtil.*; + +/** + * Base class for stemmers that use a set of RSLP-like stemming steps. + *

    + * RSLP (Removedor de Sufixos da Lingua Portuguesa) is an algorithm designed + * originally for stemming the Portuguese language, described in the paper + * A Stemming Algorithm for the Portuguese Language, Orengo et. al. + *

    + * Since this time a plural-only modification (RSLP-S) as well as a modification + * for the Galician language have been implemented. This class parses a configuration + * file that describes {@link Step}s, where each Step contains a set of {@link Rule}s. + *

    + * The general rule format is: + *

    { "suffix", N, "replacement", { "exception1", "exception2", ...}}
    + * where: + *
      + *
    • suffix is the suffix to be removed (such as "inho"). + *
    • N is the min stem size, where stem is defined as the candidate stem + * after removing the suffix (but before appending the replacement!) + *
    • replacement is an optimal string to append after removing the suffix. + * This can be the empty string. + *
    • exceptions is an optional list of exceptions, patterns that should + * not be stemmed. These patterns can be specified as whole word or suffix (ends-with) + * patterns, depending upon the exceptions format flag in the step header. + *
    + *

    + * A step is an ordered list of rules, with a structure in this format: + *

    { "name", N, B, { "cond1", "cond2", ... } + * ... rules ... }; + *
    + * where: + *
      + *
    • name is a name for the step (such as "Plural"). + *
    • N is the min word size. Words that are less than this length bypass + * the step completely, as an optimization. Note: N can be zero, in this case this + * implementation will automatically calculate the appropriate value from the underlying + * rules. + *
    • B is a "boolean" flag specifying how exceptions in the rules are matched. + * A value of 1 indicates whole-word pattern matching, a value of 0 indicates that + * exceptions are actually suffixes and should be matched with ends-with. + *
    • conds are an optional list of conditions to enter the step at all. If + * the list is non-empty, then a word must end with one of these conditions or it will + * bypass the step completely as an optimization. + *
    + *

    + * @see RSLP description + * @lucene.internal + */ +public abstract class RSLPStemmerBase { + + /** + * A basic rule, with no exceptions. + */ + protected static class Rule { + protected final char suffix[]; + protected final char replacement[]; + protected final int min; + + /** + * Create a rule. + * @param suffix suffix to remove + * @param min minimum stem length + * @param replacement replacement string + */ + public Rule(String suffix, int min, String replacement) { + this.suffix = suffix.toCharArray(); + this.replacement = replacement.toCharArray(); + this.min = min; + } + + /** + * @return true if the word matches this rule. + */ + public boolean matches(char s[], int len) { + return (len - suffix.length >= min && endsWith(s, len, suffix)); + } + + /** + * @return new valid length of the string after firing this rule. + */ + public int replace(char s[], int len) { + if (replacement.length > 0) { + System.arraycopy(replacement, 0, s, len - suffix.length, replacement.length); + } + return len - suffix.length + replacement.length; + } + } + + /** + * A rule with a set of whole-word exceptions. + */ + protected static class RuleWithSetExceptions extends Rule { + protected final CharArraySet exceptions; + + public RuleWithSetExceptions(String suffix, int min, String replacement, + String[] exceptions) { + super(suffix, min, replacement); + for (int i = 0; i < exceptions.length; i++) { + if (!exceptions[i].endsWith(suffix)) + System.err.println("warning: useless exception '" + exceptions[i] + "' does not end with '" + suffix + "'"); + } + this.exceptions = new CharArraySet(Version.LUCENE_31, + Arrays.asList(exceptions), false); + } + + @Override + public boolean matches(char s[], int len) { + return super.matches(s, len) && !exceptions.contains(s, 0, len); + } + } + + /** + * A rule with a set of exceptional suffixes. + */ + protected static class RuleWithSuffixExceptions extends Rule { + // TODO: use a more efficient datastructure: automaton? + protected final char[][] exceptions; + + public RuleWithSuffixExceptions(String suffix, int min, String replacement, + String[] exceptions) { + super(suffix, min, replacement); + for (int i = 0; i < exceptions.length; i++) { + if (!exceptions[i].endsWith(suffix)) + System.err.println("warning: useless exception '" + exceptions[i] + "' does not end with '" + suffix + "'"); + } + this.exceptions = new char[exceptions.length][]; + for (int i = 0; i < exceptions.length; i++) + this.exceptions[i] = exceptions[i].toCharArray(); + } + + @Override + public boolean matches(char s[], int len) { + if (!super.matches(s, len)) + return false; + + for (int i = 0; i < exceptions.length; i++) + if (endsWith(s, len, exceptions[i])) + return false; + + return true; + } + } + + /** + * A step containing a list of rules. + */ + protected static class Step { + protected final String name; + protected final Rule rules[]; + protected final int min; + protected final char[][] suffixes; + + /** + * Create a new step + * @param name Step's name. + * @param rules an ordered list of rules. + * @param min minimum word size. if this is 0 it is automatically calculated. + * @param suffixes optional list of conditional suffixes. may be null. + */ + public Step(String name, Rule rules[], int min, String suffixes[]) { + this.name = name; + this.rules = rules; + if (min == 0) { + min = Integer.MAX_VALUE; + for (Rule r : rules) + min = Math.min(min, r.min + r.suffix.length); + } + this.min = min; + + if (suffixes == null || suffixes.length == 0) { + this.suffixes = null; + } else { + this.suffixes = new char[suffixes.length][]; + for (int i = 0; i < suffixes.length; i++) + this.suffixes[i] = suffixes[i].toCharArray(); + } + } + + /** + * @return new valid length of the string after applying the entire step. + */ + public int apply(char s[], int len) { + if (len < min) + return len; + + if (suffixes != null) { + boolean found = false; + + for (int i = 0; i < suffixes.length; i++) + if (endsWith(s, len, suffixes[i])) { + found = true; + break; + } + + if (!found) return len; + } + + for (int i = 0; i < rules.length; i++) { + if (rules[i].matches(s, len)) + return rules[i].replace(s, len); + } + + return len; + } + } + + /** + * Parse a resource file into an RSLP stemmer description. + * @return a Map containing the named Steps in this description. + */ + protected static Map parse(Class clazz, String resource) { + // TODO: this parser is ugly, but works. use a jflex grammar instead. + try { + InputStream is = clazz.getResourceAsStream(resource); + LineNumberReader r = new LineNumberReader(new InputStreamReader(is, "UTF-8")); + Map steps = new HashMap(); + String step; + while ((step = readLine(r)) != null) { + Step s = parseStep(r, step); + steps.put(s.name, s); + } + r.close(); + return steps; + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + private static final Pattern headerPattern = + Pattern.compile("^\\{\\s*\"([^\"]*)\",\\s*([0-9]+),\\s*(0|1),\\s*\\{(.*)\\},\\s*$"); + private static final Pattern stripPattern = + Pattern.compile("^\\{\\s*\"([^\"]*)\",\\s*([0-9]+)\\s*\\}\\s*(,|(\\}\\s*;))$"); + private static final Pattern repPattern = + Pattern.compile("^\\{\\s*\"([^\"]*)\",\\s*([0-9]+),\\s*\"([^\"]*)\"\\}\\s*(,|(\\}\\s*;))$"); + private static final Pattern excPattern = + Pattern.compile("^\\{\\s*\"([^\"]*)\",\\s*([0-9]+),\\s*\"([^\"]*)\",\\s*\\{(.*)\\}\\s*\\}\\s*(,|(\\}\\s*;))$"); + + private static Step parseStep(LineNumberReader r, String header) throws IOException { + Matcher matcher = headerPattern.matcher(header); + if (!matcher.find()) { + throw new RuntimeException("Illegal Step header specified at line " + r.getLineNumber()); + } + assert matcher.groupCount() == 4; + String name = matcher.group(1); + int min = Integer.parseInt(matcher.group(2)); + int type = Integer.parseInt(matcher.group(3)); + String suffixes[] = parseList(matcher.group(4)); + Rule rules[] = parseRules(r, type); + return new Step(name, rules, min, suffixes); + } + + private static Rule[] parseRules(LineNumberReader r, int type) throws IOException { + List rules = new ArrayList(); + String line; + while ((line = readLine(r)) != null) { + Matcher matcher = stripPattern.matcher(line); + if (matcher.matches()) { + rules.add(new Rule(matcher.group(1), Integer.parseInt(matcher.group(2)), "")); + } else { + matcher = repPattern.matcher(line); + if (matcher.matches()) { + rules.add(new Rule(matcher.group(1), Integer.parseInt(matcher.group(2)), matcher.group(3))); + } else { + matcher = excPattern.matcher(line); + if (matcher.matches()) { + if (type == 0) { + rules.add(new RuleWithSuffixExceptions(matcher.group(1), + Integer.parseInt(matcher.group(2)), + matcher.group(3), + parseList(matcher.group(4)))); + } else { + rules.add(new RuleWithSetExceptions(matcher.group(1), + Integer.parseInt(matcher.group(2)), + matcher.group(3), + parseList(matcher.group(4)))); + } + } else { + throw new RuntimeException("Illegal Step rule specified at line " + r.getLineNumber()); + } + } + } + if (line.endsWith(";")) + return rules.toArray(new Rule[rules.size()]); + } + return null; + } + + private static String[] parseList(String s) { + if (s.length() == 0) + return null; + String list[] = s.split(","); + for (int i = 0; i < list.length; i++) + list[i] = parseString(list[i].trim()); + return list; + } + + private static String parseString(String s) { + return s.substring(1, s.length()-1); + } + + private static String readLine(LineNumberReader r) throws IOException { + String line = null; + while ((line = r.readLine()) != null) { + line = line.trim(); + if (line.length() > 0 && line.charAt(0) != '#') + return line; + } + return line; + } +} diff --git a/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/ASCIITLD.jflex-macro b/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/ASCIITLD.jflex-macro index 054167e0cd1..0557740e6d0 100644 --- a/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/ASCIITLD.jflex-macro +++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/ASCIITLD.jflex-macro @@ -15,8 +15,8 @@ */ // Generated from IANA Root Zone Database -// file version from Saturday, December 4, 2010 12:34:19 PM UTC -// generated on Sunday, December 5, 2010 12:24:12 AM UTC +// file version from Wednesday, January 5, 2011 12:34:09 PM UTC +// generated on Thursday, January 6, 2011 5:09:41 AM UTC // by org.apache.lucene.analysis.standard.GenerateJflexTLDMacros ASCIITLD = "." ( @@ -306,6 +306,7 @@ ASCIITLD = "." ( | [xX][nN]--[pP]1[aA][iI] | [xX][nN]--[pP][gG][bB][sS]0[dD][hH] | [xX][nN]--[wW][gG][bB][hH]1[cC] + | [xX][nN]--[wW][gG][bB][lL]6[aA] | [xX][nN]--[xX][kK][cC]2[aA][lL]3[hH][yY][eE]2[aA] | [xX][nN]--[yY][gG][bB][iI]2[aA][mM][mM][xX] | [xX][nN]--[zZ][cC][kK][zZ][aA][hH] diff --git a/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/SUPPLEMENTARY.jflex-macro b/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/SUPPLEMENTARY.jflex-macro new file mode 100644 index 00000000000..fecf9777f9a --- /dev/null +++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/SUPPLEMENTARY.jflex-macro @@ -0,0 +1,125 @@ +/* + * Copyright 2010 The Apache Software Foundation. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// Generated using ICU4J 4.6.0.0 on Thursday, January 6, 2011 7:02:52 PM UTC +// by org.apache.lucene.analysis.icu.GenerateJFlexSupplementaryMacros + + +ALetterSupp = ( + ([\ud80d][\uDC00-\uDC2E]) + | ([\ud80c][\uDC00-\uDFFF]) + | ([\ud809][\uDC00-\uDC62]) + | ([\ud808][\uDC00-\uDF6E]) + | ([\ud81a][\uDC00-\uDE38]) + | ([\ud804][\uDC03-\uDC37\uDC83-\uDCAF]) + | ([\ud835][\uDC00-\uDC54\uDC56-\uDC9C\uDC9E\uDC9F\uDCA2\uDCA5\uDCA6\uDCA9-\uDCAC\uDCAE-\uDCB9\uDCBB\uDCBD-\uDCC3\uDCC5-\uDD05\uDD07-\uDD0A\uDD0D-\uDD14\uDD16-\uDD1C\uDD1E-\uDD39\uDD3B-\uDD3E\uDD40-\uDD44\uDD46\uDD4A-\uDD50\uDD52-\uDEA5\uDEA8-\uDEC0\uDEC2-\uDEDA\uDEDC-\uDEFA\uDEFC-\uDF14\uDF16-\uDF34\uDF36-\uDF4E\uDF50-\uDF6E\uDF70-\uDF88\uDF8A-\uDFA8\uDFAA-\uDFC2\uDFC4-\uDFCB]) + | ([\ud801][\uDC00-\uDC9D]) + | ([\ud800][\uDC00-\uDC0B\uDC0D-\uDC26\uDC28-\uDC3A\uDC3C\uDC3D\uDC3F-\uDC4D\uDC50-\uDC5D\uDC80-\uDCFA\uDD40-\uDD74\uDE80-\uDE9C\uDEA0-\uDED0\uDF00-\uDF1E\uDF30-\uDF4A\uDF80-\uDF9D\uDFA0-\uDFC3\uDFC8-\uDFCF\uDFD1-\uDFD5]) + | ([\ud803][\uDC00-\uDC48]) + | ([\ud802][\uDC00-\uDC05\uDC08\uDC0A-\uDC35\uDC37\uDC38\uDC3C\uDC3F-\uDC55\uDD00-\uDD15\uDD20-\uDD39\uDE00\uDE10-\uDE13\uDE15-\uDE17\uDE19-\uDE33\uDE60-\uDE7C\uDF00-\uDF35\uDF40-\uDF55\uDF60-\uDF72]) +) +FormatSupp = ( + ([\ud804][\uDCBD]) + | ([\ud834][\uDD73-\uDD7A]) + | ([\udb40][\uDC01\uDC20-\uDC7F]) +) +ExtendSupp = ( + ([\ud804][\uDC00-\uDC02\uDC38-\uDC46\uDC80-\uDC82\uDCB0-\uDCBA]) + | ([\ud834][\uDD65-\uDD69\uDD6D-\uDD72\uDD7B-\uDD82\uDD85-\uDD8B\uDDAA-\uDDAD\uDE42-\uDE44]) + | ([\ud800][\uDDFD]) + | ([\udb40][\uDD00-\uDDEF]) + | ([\ud802][\uDE01-\uDE03\uDE05\uDE06\uDE0C-\uDE0F\uDE38-\uDE3A\uDE3F]) +) +NumericSupp = ( + ([\ud804][\uDC66-\uDC6F]) + | ([\ud835][\uDFCE-\uDFFF]) + | ([\ud801][\uDCA0-\uDCA9]) +) +KatakanaSupp = ( + ([\ud82c][\uDC00]) +) +MidLetterSupp = ( + [] +) +MidNumSupp = ( + [] +) +MidNumLetSupp = ( + [] +) +ExtendNumLetSupp = ( + [] +) +ExtendNumLetSupp = ( + [] +) +ComplexContextSupp = ( + [] +) +HanSupp = ( + ([\ud87e][\uDC00-\uDE1D]) + | ([\ud86b][\uDC00-\uDFFF]) + | ([\ud86a][\uDC00-\uDFFF]) + | ([\ud869][\uDC00-\uDED6\uDF00-\uDFFF]) + | ([\ud868][\uDC00-\uDFFF]) + | ([\ud86e][\uDC00-\uDC1D]) + | ([\ud86d][\uDC00-\uDF34\uDF40-\uDFFF]) + | ([\ud86c][\uDC00-\uDFFF]) + | ([\ud863][\uDC00-\uDFFF]) + | ([\ud862][\uDC00-\uDFFF]) + | ([\ud861][\uDC00-\uDFFF]) + | ([\ud860][\uDC00-\uDFFF]) + | ([\ud867][\uDC00-\uDFFF]) + | ([\ud866][\uDC00-\uDFFF]) + | ([\ud865][\uDC00-\uDFFF]) + | ([\ud864][\uDC00-\uDFFF]) + | ([\ud858][\uDC00-\uDFFF]) + | ([\ud859][\uDC00-\uDFFF]) + | ([\ud85a][\uDC00-\uDFFF]) + | ([\ud85b][\uDC00-\uDFFF]) + | ([\ud85c][\uDC00-\uDFFF]) + | ([\ud85d][\uDC00-\uDFFF]) + | ([\ud85e][\uDC00-\uDFFF]) + | ([\ud85f][\uDC00-\uDFFF]) + | ([\ud850][\uDC00-\uDFFF]) + | ([\ud851][\uDC00-\uDFFF]) + | ([\ud852][\uDC00-\uDFFF]) + | ([\ud853][\uDC00-\uDFFF]) + | ([\ud854][\uDC00-\uDFFF]) + | ([\ud855][\uDC00-\uDFFF]) + | ([\ud856][\uDC00-\uDFFF]) + | ([\ud857][\uDC00-\uDFFF]) + | ([\ud849][\uDC00-\uDFFF]) + | ([\ud848][\uDC00-\uDFFF]) + | ([\ud84b][\uDC00-\uDFFF]) + | ([\ud84a][\uDC00-\uDFFF]) + | ([\ud84d][\uDC00-\uDFFF]) + | ([\ud84c][\uDC00-\uDFFF]) + | ([\ud84f][\uDC00-\uDFFF]) + | ([\ud84e][\uDC00-\uDFFF]) + | ([\ud841][\uDC00-\uDFFF]) + | ([\ud840][\uDC00-\uDFFF]) + | ([\ud843][\uDC00-\uDFFF]) + | ([\ud842][\uDC00-\uDFFF]) + | ([\ud845][\uDC00-\uDFFF]) + | ([\ud844][\uDC00-\uDFFF]) + | ([\ud847][\uDC00-\uDFFF]) + | ([\ud846][\uDC00-\uDFFF]) +) +HiraganaSupp = ( + ([\ud83c][\uDE00]) + | ([\ud82c][\uDC01]) +) diff --git a/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java b/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java index 4a43225a46f..5b2b84a59fa 100644 --- a/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java +++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java @@ -34,14 +34,6 @@ import org.apache.lucene.util.Version; * Unicode Text Segmentation algorithm, as specified in * Unicode Standard Annex #29. *

    - * WARNING: Because JFlex does not support Unicode supplementary - * characters (characters above the Basic Multilingual Plane, which contains - * those up to and including U+FFFF), this scanner will not recognize them - * properly. If you need to be able to process text containing supplementary - * characters, consider using the ICU4J-backed implementation in modules/analysis/icu - * (org.apache.lucene.analysis.icu.segmentation.ICUTokenizer) - * instead of this class, since the ICU4J-backed implementation does not have - * this limitation. *

    Many applications have specific tokenizer needs. If this tokenizer does * not suit your application, please consider copying this source code * directory to your project and maintaining your own grammar-based tokenizer. diff --git a/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.java b/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.java index b4c6e902763..83c17d0b61e 100644 --- a/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.java +++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.java @@ -1,4 +1,4 @@ -/* The following code was generated by JFlex 1.5.0-SNAPSHOT on 12/4/10 7:24 PM */ +/* The following code was generated by JFlex 1.5.0-SNAPSHOT on 1/6/11 12:09 AM */ package org.apache.lucene.analysis.standard; @@ -35,14 +35,6 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; *

  • <IDEOGRAPHIC>: A single CJKV ideographic character
  • *
  • <HIRAGANA>: A single hiragana character
  • * - * WARNING: Because JFlex does not support Unicode supplementary - * characters (characters above the Basic Multilingual Plane, which contains - * those up to and including U+FFFF), this scanner will not recognize them - * properly. If you need to be able to process text containing supplementary - * characters, consider using the ICU4J-backed implementation in modules/analysis/icu - * (org.apache.lucene.analysis.icu.segmentation.ICUTokenizer) - * instead of this class, since the ICU4J-backed implementation does not have - * this limitation. */ public final class StandardTokenizerImpl implements StandardTokenizerInterface { @@ -70,115 +62,138 @@ public final class StandardTokenizerImpl implements StandardTokenizerInterface { * Translates characters to character classes */ private static final String ZZ_CMAP_PACKED = - "\47\0\1\7\4\0\1\6\1\0\1\7\1\0\12\3\1\5\1\6"+ - "\5\0\32\1\4\0\1\10\1\0\32\1\57\0\1\1\2\0\1\2"+ - "\7\0\1\1\1\0\1\5\2\0\1\1\5\0\27\1\1\0\37\1"+ - "\1\0\u01ca\1\4\0\14\1\16\0\5\1\7\0\1\1\1\0\1\1"+ - "\21\0\160\2\5\1\1\0\2\1\2\0\4\1\1\6\7\0\1\1"+ - "\1\5\3\1\1\0\1\1\1\0\24\1\1\0\123\1\1\0\213\1"+ - "\1\0\7\2\236\1\11\0\46\1\2\0\1\1\7\0\47\1\1\0"+ - "\1\6\7\0\55\2\1\0\1\2\1\0\2\2\1\0\2\2\1\0"+ - "\1\2\10\0\33\1\5\0\4\1\1\5\13\0\4\2\10\0\2\6"+ - "\2\0\13\2\5\0\53\1\25\2\12\3\1\0\1\3\1\6\1\0"+ - "\2\1\1\2\143\1\1\0\1\1\10\2\1\0\6\2\2\1\2\2"+ - "\1\0\4\2\2\1\12\3\3\1\2\0\1\1\17\0\1\2\1\1"+ - "\1\2\36\1\33\2\2\0\131\1\13\2\1\1\16\0\12\3\41\1"+ - "\11\2\2\1\2\0\1\6\1\0\1\1\5\0\26\1\4\2\1\1"+ - "\11\2\1\1\3\2\1\1\5\2\22\0\31\1\3\2\244\0\4\2"+ - "\66\1\3\2\1\1\22\2\1\1\7\2\12\1\2\2\2\0\12\3"+ - "\1\0\7\1\1\0\7\1\1\0\3\2\1\0\10\1\2\0\2\1"+ - "\2\0\26\1\1\0\7\1\1\0\1\1\3\0\4\1\2\0\1\2"+ - "\1\1\7\2\2\0\2\2\2\0\3\2\1\1\10\0\1\2\4\0"+ - "\2\1\1\0\3\1\2\2\2\0\12\3\2\1\17\0\3\2\1\0"+ - "\6\1\4\0\2\1\2\0\26\1\1\0\7\1\1\0\2\1\1\0"+ - "\2\1\1\0\2\1\2\0\1\2\1\0\5\2\4\0\2\2\2\0"+ - "\3\2\3\0\1\2\7\0\4\1\1\0\1\1\7\0\12\3\2\2"+ - "\3\1\1\2\13\0\3\2\1\0\11\1\1\0\3\1\1\0\26\1"+ - "\1\0\7\1\1\0\2\1\1\0\5\1\2\0\1\2\1\1\10\2"+ - "\1\0\3\2\1\0\3\2\2\0\1\1\17\0\2\1\2\2\2\0"+ - "\12\3\21\0\3\2\1\0\10\1\2\0\2\1\2\0\26\1\1\0"+ - "\7\1\1\0\2\1\1\0\5\1\2\0\1\2\1\1\7\2\2\0"+ - "\2\2\2\0\3\2\10\0\2\2\4\0\2\1\1\0\3\1\2\2"+ - "\2\0\12\3\1\0\1\1\20\0\1\2\1\1\1\0\6\1\3\0"+ - "\3\1\1\0\4\1\3\0\2\1\1\0\1\1\1\0\2\1\3\0"+ - "\2\1\3\0\3\1\3\0\14\1\4\0\5\2\3\0\3\2\1\0"+ - "\4\2\2\0\1\1\6\0\1\2\16\0\12\3\21\0\3\2\1\0"+ - "\10\1\1\0\3\1\1\0\27\1\1\0\12\1\1\0\5\1\3\0"+ - "\1\1\7\2\1\0\3\2\1\0\4\2\7\0\2\2\1\0\2\1"+ - "\6\0\2\1\2\2\2\0\12\3\22\0\2\2\1\0\10\1\1\0"+ - "\3\1\1\0\27\1\1\0\12\1\1\0\5\1\2\0\1\2\1\1"+ - "\7\2\1\0\3\2\1\0\4\2\7\0\2\2\7\0\1\1\1\0"+ - "\2\1\2\2\2\0\12\3\1\0\2\1\17\0\2\2\1\0\10\1"+ - "\1\0\3\1\1\0\51\1\2\0\1\1\7\2\1\0\3\2\1\0"+ - "\4\2\1\1\10\0\1\2\10\0\2\1\2\2\2\0\12\3\12\0"+ - "\6\1\2\0\2\2\1\0\22\1\3\0\30\1\1\0\11\1\1\0"+ - "\1\1\2\0\7\1\3\0\1\2\4\0\6\2\1\0\1\2\1\0"+ - "\10\2\22\0\2\2\15\0\60\11\1\12\2\11\7\12\5\0\7\11"+ - "\10\12\1\0\12\3\47\0\2\11\1\0\1\11\2\0\2\11\1\0"+ - "\1\11\2\0\1\11\6\0\4\11\1\0\7\11\1\0\3\11\1\0"+ - "\1\11\1\0\1\11\2\0\2\11\1\0\4\11\1\12\2\11\6\12"+ - "\1\0\2\12\1\11\2\0\5\11\1\0\1\11\1\0\6\12\2\0"+ - "\12\3\2\0\2\11\42\0\1\1\27\0\2\2\6\0\12\3\13\0"+ - "\1\2\1\0\1\2\1\0\1\2\4\0\2\2\10\1\1\0\44\1"+ - "\4\0\24\2\1\0\2\2\5\1\13\2\1\0\44\2\11\0\1\2"+ - "\71\0\53\11\24\12\1\11\12\3\6\0\6\11\4\12\4\11\3\12"+ - "\1\11\3\12\2\11\7\12\3\11\4\12\15\11\14\12\1\11\1\12"+ - "\12\3\4\12\2\11\46\1\12\0\53\1\1\0\1\1\3\0\u0149\1"+ - "\1\0\4\1\2\0\7\1\1\0\1\1\1\0\4\1\2\0\51\1"+ - "\1\0\4\1\2\0\41\1\1\0\4\1\2\0\7\1\1\0\1\1"+ - "\1\0\4\1\2\0\17\1\1\0\71\1\1\0\4\1\2\0\103\1"+ - "\2\0\3\2\40\0\20\1\20\0\125\1\14\0\u026c\1\2\0\21\1"+ - "\1\0\32\1\5\0\113\1\3\0\3\1\17\0\15\1\1\0\4\1"+ - "\3\2\13\0\22\1\3\2\13\0\22\1\2\2\14\0\15\1\1\0"+ - "\3\1\1\0\2\2\14\0\64\11\40\12\3\0\1\11\4\0\1\11"+ - "\1\12\2\0\12\3\41\0\3\2\2\0\12\3\6\0\130\1\10\0"+ - "\51\1\1\2\1\1\5\0\106\1\12\0\35\1\3\0\14\2\4\0"+ - "\14\2\12\0\12\3\36\11\2\0\5\11\13\0\54\11\4\0\21\12"+ - "\7\11\2\12\6\0\12\3\1\11\3\0\2\11\40\0\27\1\5\2"+ - "\4\0\65\11\12\12\1\0\35\12\2\0\1\2\12\3\6\0\12\3"+ - "\6\0\16\11\122\0\5\2\57\1\21\2\7\1\4\0\12\3\21\0"+ - "\11\2\14\0\3\2\36\1\12\2\3\0\2\1\12\3\6\0\46\1"+ - "\16\2\14\0\44\1\24\2\10\0\12\3\3\0\3\1\12\3\44\1"+ - "\122\0\3\2\1\0\25\2\4\1\1\2\4\1\1\2\15\0\300\1"+ - "\47\2\25\0\4\2\u0116\1\2\0\6\1\2\0\46\1\2\0\6\1"+ - "\2\0\10\1\1\0\1\1\1\0\1\1\1\0\1\1\1\0\37\1"+ - "\2\0\65\1\1\0\7\1\1\0\1\1\3\0\3\1\1\0\7\1"+ - "\3\0\4\1\2\0\6\1\4\0\15\1\5\0\3\1\1\0\7\1"+ - "\17\0\4\2\10\0\2\7\12\0\1\7\2\0\1\5\2\0\5\2"+ - "\20\0\2\10\3\0\1\6\17\0\1\10\13\0\5\2\5\0\6\2"+ - "\1\0\1\1\15\0\1\1\20\0\15\1\63\0\41\2\21\0\1\1"+ - "\4\0\1\1\2\0\12\1\1\0\1\1\3\0\5\1\6\0\1\1"+ - "\1\0\1\1\1\0\1\1\1\0\4\1\1\0\13\1\2\0\4\1"+ - "\5\0\5\1\4\0\1\1\21\0\51\1\u032d\0\64\1\u0716\0\57\1"+ - "\1\0\57\1\1\0\205\1\6\0\4\1\3\2\16\0\46\1\12\0"+ - "\66\1\11\0\1\1\17\0\1\2\27\1\11\0\7\1\1\0\7\1"+ - "\1\0\7\1\1\0\7\1\1\0\7\1\1\0\7\1\1\0\7\1"+ - "\1\0\7\1\1\0\40\2\57\0\1\1\120\0\32\13\1\0\131\13"+ - "\14\0\326\13\57\0\1\1\1\0\1\13\31\0\11\13\6\2\1\0"+ - "\5\4\2\0\3\13\1\1\1\1\4\0\126\14\2\0\2\2\2\4"+ - "\3\14\133\4\1\0\4\4\5\0\51\1\3\0\136\1\21\0\33\1"+ - "\65\0\20\4\320\0\57\4\1\0\130\4\250\0\u19b6\13\112\0\u51cc\13"+ - "\64\0\u048d\1\103\0\56\1\2\0\u010d\1\3\0\20\1\12\3\2\1"+ - "\24\0\57\1\4\2\11\0\2\2\1\0\31\1\10\0\120\1\2\2"+ - "\45\0\11\1\2\0\147\1\2\0\4\1\1\0\2\1\16\0\12\1"+ - "\120\0\10\1\1\2\3\1\1\2\4\1\1\2\27\1\5\2\30\0"+ - "\64\1\14\0\2\2\62\1\21\2\13\0\12\3\6\0\22\2\6\1"+ - "\3\0\1\1\4\0\12\3\34\1\10\2\2\0\27\1\15\2\14\0"+ - "\35\1\3\0\4\2\57\1\16\2\16\0\1\1\12\3\46\0\51\1"+ - "\16\2\11\0\3\1\1\2\10\1\2\2\2\0\12\3\6\0\33\11"+ - "\1\12\4\0\60\11\1\12\1\11\3\12\2\11\2\12\5\11\2\12"+ - "\1\11\1\12\1\11\30\0\5\11\41\0\6\1\2\0\6\1\2\0"+ - "\6\1\11\0\7\1\1\0\7\1\221\0\43\1\10\2\1\0\2\2"+ - "\2\0\12\3\6\0\u2ba4\1\14\0\27\1\4\0\61\1\u2104\0\u012e\13"+ - "\2\0\76\13\2\0\152\13\46\0\7\1\14\0\5\1\5\0\1\1"+ - "\1\2\12\1\1\0\15\1\1\0\5\1\1\0\1\1\1\0\2\1"+ - "\1\0\2\1\1\0\154\1\41\0\u016b\1\22\0\100\1\2\0\66\1"+ - "\50\0\14\1\4\0\20\2\1\6\2\0\1\5\1\6\13\0\7\2"+ - "\14\0\2\10\30\0\3\10\1\6\1\0\1\7\1\0\1\6\1\5"+ - "\32\0\5\1\1\0\207\1\2\0\1\2\7\0\1\7\4\0\1\6"+ - "\1\0\1\7\1\0\12\3\1\5\1\6\5\0\32\1\4\0\1\10"+ - "\1\0\32\1\13\0\70\4\2\2\37\1\3\0\6\1\2\0\6\1"+ - "\2\0\6\1\2\0\3\1\34\0\3\2\4\0"; + "\47\0\1\140\4\0\1\137\1\0\1\140\1\0\12\134\1\136\1\137"+ + "\5\0\32\132\4\0\1\141\1\0\32\132\57\0\1\132\2\0\1\133"+ + "\7\0\1\132\1\0\1\136\2\0\1\132\5\0\27\132\1\0\37\132"+ + "\1\0\u01ca\132\4\0\14\132\16\0\5\132\7\0\1\132\1\0\1\132"+ + "\21\0\160\133\5\132\1\0\2\132\2\0\4\132\1\137\7\0\1\132"+ + "\1\136\3\132\1\0\1\132\1\0\24\132\1\0\123\132\1\0\213\132"+ + "\1\0\7\133\236\132\11\0\46\132\2\0\1\132\7\0\47\132\1\0"+ + "\1\137\7\0\55\133\1\0\1\133\1\0\2\133\1\0\2\133\1\0"+ + "\1\133\10\0\33\132\5\0\4\132\1\136\13\0\4\133\10\0\2\137"+ + "\2\0\13\133\5\0\53\132\25\133\12\134\1\0\1\134\1\137\1\0"+ + "\2\132\1\133\143\132\1\0\1\132\7\133\1\133\1\0\6\133\2\132"+ + "\2\133\1\0\4\133\2\132\12\134\3\132\2\0\1\132\17\0\1\133"+ + "\1\132\1\133\36\132\33\133\2\0\131\132\13\133\1\132\16\0\12\134"+ + "\41\132\11\133\2\132\2\0\1\137\1\0\1\132\5\0\26\132\4\133"+ + "\1\132\11\133\1\132\3\133\1\132\5\133\22\0\31\132\3\133\244\0"+ + "\4\133\66\132\3\133\1\132\22\133\1\132\7\133\12\132\2\133\2\0"+ + "\12\134\1\0\7\132\1\0\7\132\1\0\3\133\1\0\10\132\2\0"+ + "\2\132\2\0\26\132\1\0\7\132\1\0\1\132\3\0\4\132\2\0"+ + "\1\133\1\132\7\133\2\0\2\133\2\0\3\133\1\132\10\0\1\133"+ + "\4\0\2\132\1\0\3\132\2\133\2\0\12\134\2\132\17\0\3\133"+ + "\1\0\6\132\4\0\2\132\2\0\26\132\1\0\7\132\1\0\2\132"+ + "\1\0\2\132\1\0\2\132\2\0\1\133\1\0\5\133\4\0\2\133"+ + "\2\0\3\133\3\0\1\133\7\0\4\132\1\0\1\132\7\0\12\134"+ + "\2\133\3\132\1\133\13\0\3\133\1\0\11\132\1\0\3\132\1\0"+ + "\26\132\1\0\7\132\1\0\2\132\1\0\5\132\2\0\1\133\1\132"+ + "\10\133\1\0\3\133\1\0\3\133\2\0\1\132\17\0\2\132\2\133"+ + "\2\0\12\134\21\0\3\133\1\0\10\132\2\0\2\132\2\0\26\132"+ + "\1\0\7\132\1\0\2\132\1\0\5\132\2\0\1\133\1\132\7\133"+ + "\2\0\2\133\2\0\3\133\10\0\2\133\4\0\2\132\1\0\3\132"+ + "\2\133\2\0\12\134\1\0\1\132\20\0\1\133\1\132\1\0\6\132"+ + "\3\0\3\132\1\0\4\132\3\0\2\132\1\0\1\132\1\0\2\132"+ + "\3\0\2\132\3\0\3\132\3\0\14\132\4\0\5\133\3\0\3\133"+ + "\1\0\4\133\2\0\1\132\6\0\1\133\16\0\12\134\21\0\3\133"+ + "\1\0\10\132\1\0\3\132\1\0\27\132\1\0\12\132\1\0\5\132"+ + "\3\0\1\132\7\133\1\0\3\133\1\0\4\133\7\0\2\133\1\0"+ + "\2\132\6\0\2\132\2\133\2\0\12\134\22\0\2\133\1\0\10\132"+ + "\1\0\3\132\1\0\27\132\1\0\12\132\1\0\5\132\2\0\1\133"+ + "\1\132\7\133\1\0\3\133\1\0\4\133\7\0\2\133\7\0\1\132"+ + "\1\0\2\132\2\133\2\0\12\134\1\0\2\132\17\0\2\133\1\0"+ + "\10\132\1\0\3\132\1\0\51\132\2\0\1\132\7\133\1\0\3\133"+ + "\1\0\4\133\1\132\10\0\1\133\10\0\2\132\2\133\2\0\12\134"+ + "\12\0\6\132\2\0\2\133\1\0\22\132\3\0\30\132\1\0\11\132"+ + "\1\0\1\132\2\0\7\132\3\0\1\133\4\0\6\133\1\0\1\133"+ + "\1\0\10\133\22\0\2\133\15\0\60\142\1\143\2\142\7\143\5\0"+ + "\7\142\10\143\1\0\12\134\47\0\2\142\1\0\1\142\2\0\2\142"+ + "\1\0\1\142\2\0\1\142\6\0\4\142\1\0\7\142\1\0\3\142"+ + "\1\0\1\142\1\0\1\142\2\0\2\142\1\0\4\142\1\143\2\142"+ + "\6\143\1\0\2\143\1\142\2\0\5\142\1\0\1\142\1\0\6\143"+ + "\2\0\12\134\2\0\2\142\42\0\1\132\27\0\2\133\6\0\12\134"+ + "\13\0\1\133\1\0\1\133\1\0\1\133\4\0\2\133\10\132\1\0"+ + "\44\132\4\0\24\133\1\0\2\133\5\132\13\133\1\0\44\133\11\0"+ + "\1\133\71\0\53\142\24\143\1\142\12\134\6\0\6\142\4\143\4\142"+ + "\3\143\1\142\3\143\2\142\7\143\3\142\4\143\15\142\14\143\1\142"+ + "\1\143\12\134\4\143\2\142\46\132\12\0\53\132\1\0\1\132\3\0"+ + "\u0149\132\1\0\4\132\2\0\7\132\1\0\1\132\1\0\4\132\2\0"+ + "\51\132\1\0\4\132\2\0\41\132\1\0\4\132\2\0\7\132\1\0"+ + "\1\132\1\0\4\132\2\0\17\132\1\0\71\132\1\0\4\132\2\0"+ + "\103\132\2\0\3\133\40\0\20\132\20\0\125\132\14\0\u026c\132\2\0"+ + "\21\132\1\0\32\132\5\0\113\132\3\0\3\132\17\0\15\132\1\0"+ + "\4\132\3\133\13\0\22\132\3\133\13\0\22\132\2\133\14\0\15\132"+ + "\1\0\3\132\1\0\2\133\14\0\64\142\2\143\36\143\3\0\1\142"+ + "\4\0\1\142\1\143\2\0\12\134\41\0\3\133\2\0\12\134\6\0"+ + "\130\132\10\0\51\132\1\133\1\132\5\0\106\132\12\0\35\132\3\0"+ + "\14\133\4\0\14\133\12\0\12\134\36\142\2\0\5\142\13\0\54\142"+ + "\4\0\21\143\7\142\2\143\6\0\12\134\1\142\3\0\2\142\40\0"+ + "\27\132\5\133\4\0\65\142\12\143\1\0\35\143\2\0\1\133\12\134"+ + "\6\0\12\134\6\0\16\142\122\0\5\133\57\132\21\133\7\132\4\0"+ + "\12\134\21\0\11\133\14\0\3\133\36\132\12\133\3\0\2\132\12\134"+ + "\6\0\46\132\16\133\14\0\44\132\24\133\10\0\12\134\3\0\3\132"+ + "\12\134\44\132\122\0\3\133\1\0\25\133\4\132\1\133\4\132\1\133"+ + "\15\0\300\132\47\133\25\0\4\133\u0116\132\2\0\6\132\2\0\46\132"+ + "\2\0\6\132\2\0\10\132\1\0\1\132\1\0\1\132\1\0\1\132"+ + "\1\0\37\132\2\0\65\132\1\0\7\132\1\0\1\132\3\0\3\132"+ + "\1\0\7\132\3\0\4\132\2\0\6\132\4\0\15\132\5\0\3\132"+ + "\1\0\7\132\17\0\2\133\2\133\10\0\2\140\12\0\1\140\2\0"+ + "\1\136\2\0\5\133\20\0\2\141\3\0\1\137\17\0\1\141\13\0"+ + "\5\133\5\0\6\133\1\0\1\132\15\0\1\132\20\0\15\132\63\0"+ + "\41\133\21\0\1\132\4\0\1\132\2\0\12\132\1\0\1\132\3\0"+ + "\5\132\6\0\1\132\1\0\1\132\1\0\1\132\1\0\4\132\1\0"+ + "\13\132\2\0\4\132\5\0\5\132\4\0\1\132\21\0\51\132\u032d\0"+ + "\64\132\u0716\0\57\132\1\0\57\132\1\0\205\132\6\0\4\132\3\133"+ + "\16\0\46\132\12\0\66\132\11\0\1\132\17\0\1\133\27\132\11\0"+ + "\7\132\1\0\7\132\1\0\7\132\1\0\7\132\1\0\7\132\1\0"+ + "\7\132\1\0\7\132\1\0\7\132\1\0\40\133\57\0\1\132\120\0"+ + "\32\144\1\0\131\144\14\0\326\144\57\0\1\132\1\0\1\144\31\0"+ + "\11\144\6\133\1\0\5\135\2\0\3\144\1\132\1\132\4\0\126\145"+ + "\2\0\2\133\2\135\3\145\133\135\1\0\4\135\5\0\51\132\3\0"+ + "\136\132\21\0\33\132\65\0\20\135\320\0\57\135\1\0\130\135\250\0"+ + "\u19b6\144\112\0\u51cc\144\64\0\u048d\132\103\0\56\132\2\0\u010d\132\3\0"+ + "\20\132\12\134\2\132\24\0\57\132\4\133\11\0\2\133\1\0\31\132"+ + "\10\0\120\132\2\133\45\0\11\132\2\0\147\132\2\0\4\132\1\0"+ + "\2\132\16\0\12\132\120\0\10\132\1\133\3\132\1\133\4\132\1\133"+ + "\27\132\5\133\30\0\64\132\14\0\2\133\62\132\21\133\13\0\12\134"+ + "\6\0\22\133\6\132\3\0\1\132\4\0\12\134\34\132\10\133\2\0"+ + "\27\132\15\133\14\0\35\132\3\0\4\133\57\132\16\133\16\0\1\132"+ + "\12\134\46\0\51\132\16\133\11\0\3\132\1\133\10\132\2\133\2\0"+ + "\12\134\6\0\33\142\1\143\4\0\60\142\1\143\1\142\3\143\2\142"+ + "\2\143\5\142\2\143\1\142\1\143\1\142\30\0\5\142\41\0\6\132"+ + "\2\0\6\132\2\0\6\132\11\0\7\132\1\0\7\132\221\0\43\132"+ + "\10\133\1\0\2\133\2\0\12\134\6\0\u2ba4\132\14\0\27\132\4\0"+ + "\61\132\4\0\1\31\1\25\1\46\1\43\1\13\3\0\1\7\1\5"+ + "\2\0\1\3\1\1\14\0\1\11\21\0\1\112\7\0\1\65\1\17"+ + "\6\0\1\130\3\0\1\120\1\120\1\120\1\120\1\120\1\120\1\120"+ + "\1\120\1\120\1\120\1\120\1\120\1\120\1\120\1\120\1\120\1\120"+ + "\1\120\1\120\1\120\1\120\1\120\1\120\1\120\1\120\1\120\1\120"+ + "\1\120\1\120\1\120\1\120\1\120\1\120\1\120\1\120\1\120\1\120"+ + "\1\120\1\120\1\120\1\120\1\121\1\120\1\120\1\120\1\125\1\123"+ + "\17\0\1\114\u02c1\0\1\70\277\0\1\113\1\71\1\2\3\124\2\35"+ + "\1\124\1\35\2\124\1\14\21\124\2\60\7\73\1\72\7\73\7\52"+ + "\1\15\1\52\1\75\2\45\1\44\1\75\1\45\1\44\10\75\2\63"+ + "\5\61\2\54\5\61\1\6\10\37\5\21\3\27\12\106\20\27\3\42"+ + "\32\30\1\26\2\24\2\110\1\111\2\110\2\111\2\110\1\111\3\24"+ + "\1\16\2\24\12\64\1\74\1\41\1\34\1\64\6\41\1\34\66\41"+ + "\5\115\6\103\1\51\4\103\2\51\10\103\1\51\7\100\1\12\2\100"+ + "\32\103\1\12\4\100\1\12\5\102\1\101\1\102\3\101\7\102\1\101"+ + "\23\102\5\67\3\102\6\67\2\67\6\66\10\66\2\100\7\66\36\100"+ + "\4\66\102\100\15\115\1\77\2\115\1\131\3\117\1\115\2\117\5\115"+ + "\4\117\4\116\1\115\3\116\1\115\5\116\26\56\4\23\1\105\2\104"+ + "\4\122\1\104\2\122\3\76\33\122\35\55\3\122\35\126\3\122\6\126"+ + "\2\33\31\126\1\33\17\126\6\122\4\22\1\10\37\22\1\10\4\22"+ + "\25\62\1\127\11\62\21\55\5\62\1\57\12\40\13\62\4\55\1\50"+ + "\6\55\12\122\17\55\1\47\3\53\15\20\11\36\1\32\24\36\2\20"+ + "\11\36\1\32\31\36\1\32\4\20\4\36\2\32\2\107\1\4\5\107"+ + "\52\4\u1900\0\u012e\144\2\0\76\144\2\0\152\144\46\0\7\132\14\0"+ + "\5\132\5\0\1\132\1\133\12\132\1\0\15\132\1\0\5\132\1\0"+ + "\1\132\1\0\2\132\1\0\2\132\1\0\154\132\41\0\u016b\132\22\0"+ + "\100\132\2\0\66\132\50\0\14\132\4\0\20\133\1\137\2\0\1\136"+ + "\1\137\13\0\7\133\14\0\2\141\30\0\3\141\1\137\1\0\1\140"+ + "\1\0\1\137\1\136\32\0\5\132\1\0\207\132\2\0\1\133\7\0"+ + "\1\140\4\0\1\137\1\0\1\140\1\0\12\134\1\136\1\137\5\0"+ + "\32\132\4\0\1\141\1\0\32\132\13\0\70\135\2\133\37\132\3\0"+ + "\6\132\2\0\6\132\2\0\6\132\2\0\3\132\34\0\3\133\4\0"; /** * Translates characters to character classes @@ -191,11 +206,11 @@ public final class StandardTokenizerImpl implements StandardTokenizerInterface { private static final int [] ZZ_ACTION = zzUnpackAction(); private static final String ZZ_ACTION_PACKED_0 = - "\1\0\1\1\1\2\1\3\1\2\1\1\1\4\1\5"+ - "\1\6\1\2\1\0\1\2\1\0\1\3\2\0"; + "\1\0\23\1\1\2\1\3\1\2\1\1\1\4\1\5"+ + "\1\6\15\0\1\2\1\0\1\2\10\0\1\3\61\0"; private static int [] zzUnpackAction() { - int [] result = new int[16]; + int [] result = new int[101]; int offset = 0; offset = zzUnpackAction(ZZ_ACTION_PACKED_0, offset, result); return result; @@ -220,11 +235,22 @@ public final class StandardTokenizerImpl implements StandardTokenizerInterface { private static final int [] ZZ_ROWMAP = zzUnpackRowMap(); private static final String ZZ_ROWMAP_PACKED_0 = - "\0\0\0\15\0\32\0\47\0\64\0\101\0\116\0\15"+ - "\0\15\0\133\0\150\0\165\0\202\0\217\0\101\0\234"; + "\0\0\0\146\0\314\0\u0132\0\u0198\0\u01fe\0\u0264\0\u02ca"+ + "\0\u0330\0\u0396\0\u03fc\0\u0462\0\u04c8\0\u052e\0\u0594\0\u05fa"+ + "\0\u0660\0\u06c6\0\u072c\0\u0792\0\u07f8\0\u085e\0\u08c4\0\u092a"+ + "\0\u0990\0\146\0\146\0\314\0\u0132\0\u0198\0\u01fe\0\u0264"+ + "\0\u09f6\0\u0a5c\0\u0ac2\0\u0b28\0\u0462\0\u0b8e\0\u0bf4\0\u0c5a"+ + "\0\u0cc0\0\u0d26\0\u0d8c\0\u0df2\0\u0330\0\u0396\0\u0e58\0\u0ebe"+ + "\0\u0f24\0\u0f8a\0\u0ff0\0\u1056\0\u10bc\0\u1122\0\u1188\0\u11ee"+ + "\0\u1254\0\u12ba\0\u1320\0\u1386\0\u13ec\0\u1452\0\u14b8\0\u092a"+ + "\0\u151e\0\u1584\0\u15ea\0\u1650\0\u16b6\0\u171c\0\u1782\0\u17e8"+ + "\0\u184e\0\u18b4\0\u191a\0\u1980\0\u19e6\0\u1a4c\0\u1ab2\0\u1b18"+ + "\0\u1b7e\0\u1be4\0\u1c4a\0\u1cb0\0\u1d16\0\u1d7c\0\u1de2\0\u1e48"+ + "\0\u1eae\0\u1f14\0\u1f7a\0\u1fe0\0\u2046\0\u20ac\0\u2112\0\u2178"+ + "\0\u21de\0\u2244\0\u22aa\0\u2310\0\u2376"; private static int [] zzUnpackRowMap() { - int [] result = new int[16]; + int [] result = new int[101]; int offset = 0; offset = zzUnpackRowMap(ZZ_ROWMAP_PACKED_0, offset, result); return result; @@ -247,21 +273,280 @@ public final class StandardTokenizerImpl implements StandardTokenizerInterface { private static final int [] ZZ_TRANS = zzUnpackTrans(); private static final String ZZ_TRANS_PACKED_0 = - "\1\2\1\3\1\2\1\4\1\5\3\2\1\6\2\7"+ - "\1\10\1\11\16\0\2\3\1\12\1\0\1\13\1\0"+ - "\1\13\1\14\1\0\1\3\3\0\1\3\2\4\2\0"+ - "\2\15\1\16\1\0\1\4\4\0\1\5\1\0\1\5"+ - "\3\0\1\14\1\0\1\5\3\0\1\3\1\17\1\4"+ - "\1\5\3\0\1\17\1\0\1\17\13\0\2\7\3\0"+ - "\1\3\2\12\2\0\2\20\1\14\1\0\1\12\3\0"+ - "\1\3\1\13\7\0\1\13\3\0\1\3\1\14\1\12"+ - "\1\5\3\0\1\14\1\0\1\14\4\0\1\15\1\4"+ - "\6\0\1\15\3\0\1\3\1\16\1\4\1\5\3\0"+ - "\1\16\1\0\1\16\4\0\1\20\1\12\6\0\1\20"+ - "\2\0"; + "\1\2\1\3\1\2\1\4\1\2\1\5\1\2\1\6"+ + "\1\2\1\7\1\2\1\10\3\2\1\11\5\2\1\12"+ + "\3\2\1\13\11\2\1\14\2\2\1\15\43\2\1\16"+ + "\1\2\1\17\3\2\1\20\1\21\1\2\1\22\1\2"+ + "\1\23\2\2\1\24\1\2\1\25\1\2\1\26\1\27"+ + "\3\2\1\30\2\31\1\32\1\33\150\0\1\25\11\0"+ + "\1\25\20\0\1\25\22\0\1\25\10\0\3\25\17\0"+ + "\1\25\10\0\1\25\23\0\1\25\1\0\1\25\1\0"+ + "\1\25\1\0\1\25\1\0\1\25\1\0\3\25\1\0"+ + "\5\25\1\0\3\25\1\0\11\25\1\0\2\25\1\0"+ + "\16\25\1\0\2\25\1\0\21\25\1\0\1\25\1\0"+ + "\3\25\2\0\1\25\1\0\1\25\1\0\2\25\1\0"+ + "\1\25\16\0\1\25\3\0\1\25\5\0\2\25\3\0"+ + "\1\25\13\0\1\25\1\0\1\25\4\0\2\25\4\0"+ + "\1\25\1\0\1\25\3\0\2\25\1\0\1\25\5\0"+ + "\3\25\1\0\1\25\15\0\1\25\10\0\1\25\23\0"+ + "\1\25\3\0\1\25\1\0\1\25\1\0\1\25\1\0"+ + "\3\25\2\0\4\25\1\0\3\25\2\0\3\25\1\0"+ + "\4\25\1\0\2\25\2\0\3\25\1\0\11\25\1\0"+ + "\2\25\1\0\16\25\1\0\2\25\1\0\1\25\1\0"+ + "\3\25\2\0\1\25\1\0\1\25\1\0\2\25\1\0"+ + "\1\25\16\0\1\25\3\0\1\25\3\0\1\25\1\0"+ + "\3\25\2\0\1\25\1\0\2\25\1\0\3\25\3\0"+ + "\2\25\1\0\1\25\1\0\2\25\1\0\2\25\3\0"+ + "\2\25\1\0\1\25\1\0\1\25\1\0\2\25\1\0"+ + "\2\25\1\0\2\25\1\0\5\25\1\0\5\25\1\0"+ + "\2\25\1\0\2\25\1\0\1\25\1\0\3\25\4\0"+ + "\1\25\4\0\1\25\30\0\3\25\5\0\1\25\1\0"+ + "\1\25\1\0\1\25\4\0\1\25\14\0\1\25\5\0"+ + "\1\25\11\0\2\25\12\0\1\26\1\0\2\25\12\0"+ + "\1\25\23\0\1\25\1\0\1\26\7\0\2\25\2\0"+ + "\5\25\2\0\2\25\4\0\6\25\1\0\2\25\4\0"+ + "\5\25\1\0\5\25\1\0\2\25\1\0\3\25\1\0"+ + "\4\25\1\0\5\25\1\26\1\0\1\25\1\0\1\25"+ + "\1\0\3\25\2\0\1\25\1\0\1\25\1\0\1\25"+ + "\2\0\1\25\16\0\1\25\3\0\1\25\5\0\2\25"+ + "\3\0\1\25\4\0\3\25\4\0\1\25\1\0\1\25"+ + "\2\0\1\25\1\0\2\25\4\0\1\25\1\0\1\25"+ + "\3\0\2\25\1\0\1\25\5\0\3\25\1\0\1\25"+ + "\10\0\1\25\1\0\2\26\1\0\1\25\10\0\1\25"+ + "\23\0\1\25\3\0\1\25\6\0\2\25\5\0\1\25"+ + "\1\0\1\25\1\0\1\25\1\0\11\25\2\0\1\25"+ + "\4\0\1\25\4\0\6\25\2\0\1\25\1\0\1\25"+ + "\1\0\3\25\3\0\2\25\4\0\3\25\1\0\1\25"+ + "\10\0\1\25\1\0\2\25\20\0\1\25\11\0\2\25"+ + "\17\0\1\25\6\0\2\25\4\0\1\25\5\0\1\25"+ + "\2\0\1\25\5\0\3\25\1\0\1\25\15\0\1\25"+ + "\10\0\1\25\23\0\1\25\3\0\1\25\5\0\1\25"+ + "\32\0\15\25\5\0\3\25\1\0\1\25\5\0\1\25"+ + "\7\0\1\25\2\0\1\25\5\0\1\25\2\0\1\25"+ + "\1\0\1\25\105\0\1\33\21\0\1\27\34\0\1\32"+ + "\3\0\1\32\3\0\1\32\1\0\3\32\2\0\1\32"+ + "\2\0\1\32\1\0\3\32\3\0\2\32\1\0\1\32"+ + "\1\0\2\32\1\0\2\32\3\0\2\32\1\0\1\32"+ + "\3\0\2\32\1\0\2\32\1\0\2\32\1\0\5\32"+ + "\1\0\5\32\2\0\1\32\1\0\2\32\1\0\1\32"+ + "\1\0\3\32\4\0\1\32\4\0\1\32\16\0\1\32"+ + "\1\0\1\32\1\0\1\32\1\0\1\32\1\0\1\32"+ + "\1\0\3\32\1\0\5\32\1\0\3\32\1\0\11\32"+ + "\1\0\2\32\1\0\16\32\1\0\2\32\1\0\21\32"+ + "\1\0\1\32\1\0\3\32\2\0\1\32\1\0\1\32"+ + "\1\0\2\32\1\0\1\32\16\0\1\32\1\0\1\32"+ + "\1\0\1\32\3\0\1\32\1\0\3\32\1\0\2\32"+ + "\1\0\2\32\1\0\3\32\1\0\11\32\1\0\2\32"+ + "\1\0\16\32\1\0\2\32\1\0\21\32\1\0\1\32"+ + "\1\0\3\32\2\0\1\32\1\0\1\32\1\0\2\32"+ + "\1\0\1\32\16\0\1\32\11\0\1\32\20\0\1\32"+ + "\33\0\1\32\21\0\1\32\10\0\1\32\23\0\1\32"+ + "\1\0\1\32\1\0\1\32\1\0\1\32\1\0\1\32"+ + "\1\0\3\32\1\0\5\32\1\0\3\32\1\0\6\32"+ + "\1\0\2\32\1\0\2\32\1\0\10\32\1\0\5\32"+ + "\1\0\2\32\1\0\21\32\1\0\1\32\1\0\3\32"+ + "\2\0\1\32\1\0\1\32\1\0\2\32\1\0\1\32"+ + "\145\0\1\33\15\0\1\34\1\0\1\35\1\0\1\36"+ + "\1\0\1\37\1\0\1\40\1\0\1\41\3\0\1\42"+ + "\5\0\1\43\3\0\1\44\11\0\1\45\2\0\1\46"+ + "\16\0\1\47\2\0\1\50\41\0\2\25\1\51\1\0"+ + "\1\52\1\0\1\52\1\53\1\0\1\25\3\0\1\34"+ + "\1\0\1\35\1\0\1\36\1\0\1\37\1\0\1\40"+ + "\1\0\1\54\3\0\1\55\5\0\1\56\3\0\1\57"+ + "\11\0\1\45\2\0\1\60\16\0\1\61\2\0\1\62"+ + "\41\0\1\25\2\26\2\0\2\63\1\64\1\0\1\26"+ + "\15\0\1\65\15\0\1\66\14\0\1\67\16\0\1\70"+ + "\2\0\1\71\21\0\1\72\20\0\1\27\1\0\1\27"+ + "\3\0\1\53\1\0\1\27\3\0\1\34\1\0\1\35"+ + "\1\0\1\36\1\0\1\37\1\0\1\40\1\0\1\73"+ + "\3\0\1\55\5\0\1\56\3\0\1\74\11\0\1\45"+ + "\2\0\1\75\16\0\1\76\2\0\1\77\21\0\1\72"+ + "\17\0\1\25\1\100\1\26\1\27\3\0\1\100\1\0"+ + "\1\100\144\0\2\31\4\0\1\25\11\0\3\25\5\0"+ + "\1\25\1\0\1\25\1\0\1\25\4\0\1\25\4\0"+ + "\1\25\1\0\2\25\4\0\1\25\5\0\1\25\3\0"+ + "\1\25\4\0\5\25\10\0\1\51\1\0\2\25\1\0"+ + "\1\25\10\0\1\25\23\0\1\25\1\0\1\51\7\0"+ + "\2\25\2\0\5\25\2\0\2\25\4\0\6\25\1\0"+ + "\2\25\4\0\5\25\1\0\5\25\1\0\2\25\1\0"+ + "\3\25\1\0\4\25\1\0\5\25\1\51\1\0\1\25"+ + "\1\0\1\25\1\0\3\25\2\0\1\25\1\0\1\25"+ + "\1\0\1\25\2\0\1\25\16\0\1\25\3\0\1\25"+ + "\5\0\2\25\3\0\1\25\4\0\3\25\4\0\1\25"+ + "\1\0\1\25\2\0\1\25\1\0\2\25\4\0\1\25"+ + "\1\0\1\25\3\0\2\25\1\0\1\25\5\0\3\25"+ + "\1\0\1\25\10\0\1\25\1\0\2\51\1\0\1\25"+ + "\10\0\1\25\23\0\1\25\3\0\1\25\6\0\2\25"+ + "\5\0\1\25\1\0\1\25\1\0\1\25\1\0\11\25"+ + "\2\0\1\25\4\0\1\25\4\0\6\25\2\0\1\25"+ + "\1\0\1\25\1\0\3\25\1\0\1\25\1\0\2\25"+ + "\4\0\3\25\1\0\1\25\10\0\1\25\1\0\2\25"+ + "\20\0\1\25\3\0\1\25\5\0\1\25\32\0\15\25"+ + "\5\0\3\25\1\0\1\25\5\0\3\25\5\0\1\25"+ + "\2\0\2\25\4\0\1\25\2\0\1\25\1\0\1\25"+ + "\102\0\2\25\6\0\1\25\55\0\1\25\3\0\1\25"+ + "\2\0\1\25\3\0\1\25\5\0\1\25\7\0\1\25"+ + "\4\0\2\25\3\0\2\25\1\0\1\25\4\0\1\25"+ + "\1\0\1\25\2\0\2\25\1\0\3\25\1\0\1\25"+ + "\2\0\4\25\2\0\1\25\40\0\1\34\1\0\1\35"+ + "\1\0\1\36\1\0\1\37\1\0\1\40\1\0\1\101"+ + "\3\0\1\42\5\0\1\43\3\0\1\102\11\0\1\45"+ + "\2\0\1\103\16\0\1\104\2\0\1\105\41\0\1\25"+ + "\2\51\2\0\2\106\1\53\1\0\1\51\3\0\1\34"+ + "\1\0\1\35\1\0\1\36\1\0\1\37\1\0\1\40"+ + "\1\0\1\107\3\0\1\110\5\0\1\111\3\0\1\112"+ + "\11\0\1\45\2\0\1\113\16\0\1\114\2\0\1\115"+ + "\41\0\1\25\1\52\7\0\1\52\3\0\1\34\1\0"+ + "\1\35\1\0\1\36\1\0\1\37\1\0\1\40\1\0"+ + "\1\116\3\0\1\42\5\0\1\43\3\0\1\117\11\0"+ + "\1\45\2\0\1\120\16\0\1\121\2\0\1\122\21\0"+ + "\1\72\17\0\1\25\1\53\1\51\1\27\3\0\1\53"+ + "\1\0\1\53\4\0\1\26\11\0\3\25\5\0\1\25"+ + "\1\0\1\25\1\0\1\25\4\0\1\25\4\0\1\26"+ + "\1\0\2\26\4\0\1\25\5\0\1\25\3\0\1\26"+ + "\4\0\1\26\2\25\2\26\10\0\1\26\1\0\2\25"+ + "\1\0\1\26\10\0\1\25\23\0\1\25\3\0\1\25"+ + "\6\0\2\25\5\0\1\25\1\0\1\25\1\0\1\25"+ + "\1\0\11\25\2\0\1\25\4\0\1\25\4\0\6\25"+ + "\2\0\1\25\1\0\1\25\1\0\3\25\1\0\1\26"+ + "\1\0\2\25\4\0\3\25\1\0\1\25\10\0\1\25"+ + "\1\0\2\25\20\0\1\25\3\0\1\25\5\0\1\25"+ + "\32\0\15\25\5\0\3\25\1\0\1\25\5\0\1\25"+ + "\2\26\5\0\1\25\2\0\1\25\1\26\4\0\1\25"+ + "\2\0\1\25\1\0\1\25\102\0\2\26\6\0\1\26"+ + "\55\0\1\26\3\0\1\26\2\0\1\26\3\0\1\26"+ + "\5\0\1\26\7\0\1\26\4\0\2\26\3\0\2\26"+ + "\1\0\1\26\4\0\1\26\1\0\1\26\2\0\2\26"+ + "\1\0\3\26\1\0\1\26\2\0\4\26\2\0\1\26"+ + "\52\0\1\123\3\0\1\124\5\0\1\125\3\0\1\126"+ + "\14\0\1\127\16\0\1\130\2\0\1\131\42\0\1\63"+ + "\1\26\6\0\1\63\3\0\1\34\1\0\1\35\1\0"+ + "\1\36\1\0\1\37\1\0\1\40\1\0\1\132\3\0"+ + "\1\55\5\0\1\56\3\0\1\133\11\0\1\45\2\0"+ + "\1\134\16\0\1\135\2\0\1\136\21\0\1\72\17\0"+ + "\1\25\1\64\1\26\1\27\3\0\1\64\1\0\1\64"+ + "\4\0\1\27\37\0\1\27\1\0\2\27\16\0\1\27"+ + "\4\0\1\27\2\0\2\27\15\0\1\27\131\0\1\27"+ + "\152\0\2\27\11\0\1\27\114\0\2\27\6\0\1\27"+ + "\55\0\1\27\3\0\1\27\2\0\1\27\3\0\1\27"+ + "\5\0\1\27\7\0\1\27\4\0\2\27\3\0\2\27"+ + "\1\0\1\27\4\0\1\27\1\0\1\27\2\0\2\27"+ + "\1\0\3\27\1\0\1\27\2\0\4\27\2\0\1\27"+ + "\152\0\1\27\34\0\1\100\11\0\3\25\5\0\1\25"+ + "\1\0\1\25\1\0\1\25\4\0\1\25\4\0\1\100"+ + "\1\0\2\100\4\0\1\25\5\0\1\25\3\0\1\100"+ + "\4\0\1\100\2\25\2\100\10\0\1\26\1\0\2\25"+ + "\1\0\1\100\10\0\1\25\23\0\1\25\3\0\1\25"+ + "\6\0\2\25\5\0\1\25\1\0\1\25\1\0\1\25"+ + "\1\0\11\25\2\0\1\25\4\0\1\25\4\0\6\25"+ + "\2\0\1\25\1\0\1\25\1\0\3\25\1\0\1\100"+ + "\1\0\2\25\4\0\3\25\1\0\1\25\10\0\1\25"+ + "\1\0\2\25\20\0\1\25\3\0\1\25\5\0\1\25"+ + "\32\0\15\25\5\0\3\25\1\0\1\25\5\0\1\25"+ + "\2\100\5\0\1\25\2\0\1\25\1\100\4\0\1\25"+ + "\2\0\1\25\1\0\1\25\102\0\2\100\6\0\1\100"+ + "\55\0\1\100\3\0\1\100\2\0\1\100\3\0\1\100"+ + "\5\0\1\100\7\0\1\100\4\0\2\100\3\0\2\100"+ + "\1\0\1\100\4\0\1\100\1\0\1\100\2\0\2\100"+ + "\1\0\3\100\1\0\1\100\2\0\4\100\2\0\1\100"+ + "\41\0\1\51\11\0\3\25\5\0\1\25\1\0\1\25"+ + "\1\0\1\25\4\0\1\25\4\0\1\51\1\0\2\51"+ + "\4\0\1\25\5\0\1\25\3\0\1\51\4\0\1\51"+ + "\2\25\2\51\10\0\1\51\1\0\2\25\1\0\1\51"+ + "\10\0\1\25\23\0\1\25\3\0\1\25\6\0\2\25"+ + "\5\0\1\25\1\0\1\25\1\0\1\25\1\0\11\25"+ + "\2\0\1\25\4\0\1\25\4\0\6\25\2\0\1\25"+ + "\1\0\1\25\1\0\3\25\1\0\1\51\1\0\2\25"+ + "\4\0\3\25\1\0\1\25\10\0\1\25\1\0\2\25"+ + "\20\0\1\25\3\0\1\25\5\0\1\25\32\0\15\25"+ + "\5\0\3\25\1\0\1\25\5\0\1\25\2\51\5\0"+ + "\1\25\2\0\1\25\1\51\4\0\1\25\2\0\1\25"+ + "\1\0\1\25\102\0\2\51\6\0\1\51\55\0\1\51"+ + "\3\0\1\51\2\0\1\51\3\0\1\51\5\0\1\51"+ + "\7\0\1\51\4\0\2\51\3\0\2\51\1\0\1\51"+ + "\4\0\1\51\1\0\1\51\2\0\2\51\1\0\3\51"+ + "\1\0\1\51\2\0\4\51\2\0\1\51\52\0\1\137"+ + "\3\0\1\140\5\0\1\141\3\0\1\142\14\0\1\143"+ + "\16\0\1\144\2\0\1\145\42\0\1\106\1\51\6\0"+ + "\1\106\4\0\1\52\11\0\3\25\5\0\1\25\1\0"+ + "\1\25\1\0\1\25\4\0\1\25\4\0\1\52\1\0"+ + "\2\52\4\0\1\25\5\0\1\25\3\0\1\52\4\0"+ + "\1\52\2\25\2\52\12\0\2\25\1\0\1\52\10\0"+ + "\1\25\23\0\1\25\11\0\2\25\2\0\5\25\2\0"+ + "\2\25\4\0\6\25\1\0\2\25\4\0\5\25\1\0"+ + "\5\25\1\0\2\25\1\0\3\25\1\0\4\25\1\0"+ + "\5\25\2\0\1\25\1\0\1\25\1\0\3\25\2\0"+ + "\1\25\1\0\1\25\1\0\1\25\2\0\1\25\16\0"+ + "\1\25\3\0\1\25\5\0\2\25\3\0\1\25\4\0"+ + "\3\25\4\0\1\25\1\0\1\25\2\0\1\25\1\0"+ + "\2\25\4\0\1\25\1\0\1\25\3\0\2\25\1\0"+ + "\1\25\5\0\3\25\1\0\1\25\10\0\1\25\4\0"+ + "\1\25\10\0\1\25\23\0\1\25\3\0\1\25\6\0"+ + "\2\25\5\0\1\25\1\0\1\25\1\0\1\25\1\0"+ + "\11\25\2\0\1\25\4\0\1\25\4\0\6\25\2\0"+ + "\1\25\1\0\1\25\1\0\3\25\1\0\1\52\1\0"+ + "\2\25\4\0\3\25\1\0\1\25\10\0\1\25\1\0"+ + "\2\25\20\0\1\25\3\0\1\25\5\0\1\25\32\0"+ + "\15\25\5\0\3\25\1\0\1\25\5\0\1\25\2\52"+ + "\5\0\1\25\2\0\1\25\1\52\4\0\1\25\2\0"+ + "\1\25\1\0\1\25\102\0\2\52\6\0\1\52\55\0"+ + "\1\52\3\0\1\52\2\0\1\52\3\0\1\52\5\0"+ + "\1\52\7\0\1\52\4\0\2\52\3\0\2\52\1\0"+ + "\1\52\4\0\1\52\1\0\1\52\2\0\2\52\1\0"+ + "\3\52\1\0\1\52\2\0\4\52\2\0\1\52\41\0"+ + "\1\53\11\0\3\25\5\0\1\25\1\0\1\25\1\0"+ + "\1\25\4\0\1\25\4\0\1\53\1\0\2\53\4\0"+ + "\1\25\5\0\1\25\3\0\1\53\4\0\1\53\2\25"+ + "\2\53\10\0\1\51\1\0\2\25\1\0\1\53\10\0"+ + "\1\25\23\0\1\25\3\0\1\25\6\0\2\25\5\0"+ + "\1\25\1\0\1\25\1\0\1\25\1\0\11\25\2\0"+ + "\1\25\4\0\1\25\4\0\6\25\2\0\1\25\1\0"+ + "\1\25\1\0\3\25\1\0\1\53\1\0\2\25\4\0"+ + "\3\25\1\0\1\25\10\0\1\25\1\0\2\25\20\0"+ + "\1\25\3\0\1\25\5\0\1\25\32\0\15\25\5\0"+ + "\3\25\1\0\1\25\5\0\1\25\2\53\5\0\1\25"+ + "\2\0\1\25\1\53\4\0\1\25\2\0\1\25\1\0"+ + "\1\25\102\0\2\53\6\0\1\53\55\0\1\53\3\0"+ + "\1\53\2\0\1\53\3\0\1\53\5\0\1\53\7\0"+ + "\1\53\4\0\2\53\3\0\2\53\1\0\1\53\4\0"+ + "\1\53\1\0\1\53\2\0\2\53\1\0\3\53\1\0"+ + "\1\53\2\0\4\53\2\0\1\53\41\0\1\63\37\0"+ + "\1\63\1\0\2\63\16\0\1\63\4\0\1\63\2\0"+ + "\2\63\10\0\1\26\4\0\1\63\36\0\1\26\102\0"+ + "\1\26\146\0\2\26\133\0\1\63\152\0\2\63\11\0"+ + "\1\63\114\0\2\63\6\0\1\63\55\0\1\63\3\0"+ + "\1\63\2\0\1\63\3\0\1\63\5\0\1\63\7\0"+ + "\1\63\4\0\2\63\3\0\2\63\1\0\1\63\4\0"+ + "\1\63\1\0\1\63\2\0\2\63\1\0\3\63\1\0"+ + "\1\63\2\0\4\63\2\0\1\63\41\0\1\64\11\0"+ + "\3\25\5\0\1\25\1\0\1\25\1\0\1\25\4\0"+ + "\1\25\4\0\1\64\1\0\2\64\4\0\1\25\5\0"+ + "\1\25\3\0\1\64\4\0\1\64\2\25\2\64\10\0"+ + "\1\26\1\0\2\25\1\0\1\64\10\0\1\25\23\0"+ + "\1\25\3\0\1\25\6\0\2\25\5\0\1\25\1\0"+ + "\1\25\1\0\1\25\1\0\11\25\2\0\1\25\4\0"+ + "\1\25\4\0\6\25\2\0\1\25\1\0\1\25\1\0"+ + "\3\25\1\0\1\64\1\0\2\25\4\0\3\25\1\0"+ + "\1\25\10\0\1\25\1\0\2\25\20\0\1\25\3\0"+ + "\1\25\5\0\1\25\32\0\15\25\5\0\3\25\1\0"+ + "\1\25\5\0\1\25\2\64\5\0\1\25\2\0\1\25"+ + "\1\64\4\0\1\25\2\0\1\25\1\0\1\25\102\0"+ + "\2\64\6\0\1\64\55\0\1\64\3\0\1\64\2\0"+ + "\1\64\3\0\1\64\5\0\1\64\7\0\1\64\4\0"+ + "\2\64\3\0\2\64\1\0\1\64\4\0\1\64\1\0"+ + "\1\64\2\0\2\64\1\0\3\64\1\0\1\64\2\0"+ + "\4\64\2\0\1\64\41\0\1\106\37\0\1\106\1\0"+ + "\2\106\16\0\1\106\4\0\1\106\2\0\2\106\10\0"+ + "\1\51\4\0\1\106\36\0\1\51\102\0\1\51\146\0"+ + "\2\51\133\0\1\106\152\0\2\106\11\0\1\106\114\0"+ + "\2\106\6\0\1\106\55\0\1\106\3\0\1\106\2\0"+ + "\1\106\3\0\1\106\5\0\1\106\7\0\1\106\4\0"+ + "\2\106\3\0\2\106\1\0\1\106\4\0\1\106\1\0"+ + "\1\106\2\0\2\106\1\0\3\106\1\0\1\106\2\0"+ + "\4\106\2\0\1\106\37\0"; private static int [] zzUnpackTrans() { - int [] result = new int[169]; + int [] result = new int[9180]; int offset = 0; offset = zzUnpackTrans(ZZ_TRANS_PACKED_0, offset, result); return result; @@ -299,11 +584,11 @@ public final class StandardTokenizerImpl implements StandardTokenizerInterface { private static final int [] ZZ_ATTRIBUTE = zzUnpackAttribute(); private static final String ZZ_ATTRIBUTE_PACKED_0 = - "\1\0\1\11\5\1\2\11\1\1\1\0\1\1\1\0"+ - "\1\1\2\0"; + "\1\0\1\11\27\1\2\11\15\0\1\1\1\0\1\1"+ + "\10\0\1\1\61\0"; private static int [] zzUnpackAttribute() { - int [] result = new int[16]; + int [] result = new int[101]; int offset = 0; offset = zzUnpackAttribute(ZZ_ATTRIBUTE_PACKED_0, offset, result); return result; @@ -434,7 +719,7 @@ public final class StandardTokenizerImpl implements StandardTokenizerInterface { char [] map = new char[0x10000]; int i = 0; /* index in packed string */ int j = 0; /* index in unpacked array */ - while (i < 2174) { + while (i < 2640) { int count = packed.charAt(i++); char value = packed.charAt(i++); do map[j++] = value; while (--count > 0); @@ -713,28 +998,28 @@ public final class StandardTokenizerImpl implements StandardTokenizerInterface { zzMarkedPos = zzMarkedPosL; switch (zzAction < 0 ? zzAction : ZZ_ACTION[zzAction]) { - case 1: - { /* Not numeric, word, ideographic, hiragana, or SE Asian -- ignore it. */ - } - case 7: break; - case 6: - { return HIRAGANA_TYPE; - } - case 8: break; case 2: { return WORD_TYPE; } - case 9: break; + case 7: break; + case 4: + { return SOUTH_EAST_ASIAN_TYPE; + } + case 8: break; case 5: { return IDEOGRAPHIC_TYPE; } + case 9: break; + case 1: + { /* Not numeric, word, ideographic, hiragana, or SE Asian -- ignore it. */ + } case 10: break; case 3: { return NUMERIC_TYPE; } case 11: break; - case 4: - { return SOUTH_EAST_ASIAN_TYPE; + case 6: + { return HIRAGANA_TYPE; } case 12: break; default: diff --git a/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.jflex b/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.jflex index 7cb8ee498f4..8c805923466 100644 --- a/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.jflex +++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.jflex @@ -33,14 +33,6 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; *
  • <IDEOGRAPHIC>: A single CJKV ideographic character
  • *
  • <HIRAGANA>: A single hiragana character
  • * - * WARNING: Because JFlex does not support Unicode supplementary - * characters (characters above the Basic Multilingual Plane, which contains - * those up to and including U+FFFF), this scanner will not recognize them - * properly. If you need to be able to process text containing supplementary - * characters, consider using the ICU4J-backed implementation in modules/analysis/icu - * (org.apache.lucene.analysis.icu.segmentation.ICUTokenizer) - * instead of this class, since the ICU4J-backed implementation does not have - * this limitation. */ %% @@ -53,15 +45,29 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; %function getNextToken %char +%include src/java/org/apache/lucene/analysis/standard/SUPPLEMENTARY.jflex-macro +ALetter = ([\p{WB:ALetter}] | {ALetterSupp}) +Format = ([\p{WB:Format}] | {FormatSupp}) +Numeric = ([\p{WB:Numeric}] | {NumericSupp}) +Extend = ([\p{WB:Extend}] | {ExtendSupp}) +Katakana = ([\p{WB:Katakana}] | {KatakanaSupp}) +MidLetter = ([\p{WB:MidLetter}] | {MidLetterSupp}) +MidNum = ([\p{WB:MidNum}] | {MidNumSupp}) +MidNumLet = ([\p{WB:MidNumLet}] | {MidNumLetSupp}) +ExtendNumLet = ([\p{WB:ExtendNumLet}] | {ExtendNumLetSupp}) +ComplexContext = ([\p{LB:Complex_Context}] | {ComplexContextSupp}) +Han = ([\p{Script:Han}] | {HanSupp}) +Hiragana = ([\p{Script:Hiragana}] | {HiraganaSupp}) + // UAX#29 WB4. X (Extend | Format)* --> X // -ALetterEx = \p{WB:ALetter} [\p{WB:Format}\p{WB:Extend}]* +ALetterEx = {ALetter} ({Format} | {Extend})* // TODO: Convert hard-coded full-width numeric range to property intersection (something like [\p{Full-Width}&&\p{Numeric}]) once JFlex supports it -NumericEx = [\p{WB:Numeric}\uFF10-\uFF19] [\p{WB:Format}\p{WB:Extend}]* -KatakanaEx = \p{WB:Katakana} [\p{WB:Format}\p{WB:Extend}]* -MidLetterEx = [\p{WB:MidLetter}\p{WB:MidNumLet}] [\p{WB:Format}\p{WB:Extend}]* -MidNumericEx = [\p{WB:MidNum}\p{WB:MidNumLet}] [\p{WB:Format}\p{WB:Extend}]* -ExtendNumLetEx = \p{WB:ExtendNumLet} [\p{WB:Format}\p{WB:Extend}]* +NumericEx = ({Numeric} | [\uFF10-\uFF19]) ({Format} | {Extend})* +KatakanaEx = {Katakana} ({Format} | {Extend})* +MidLetterEx = ({MidLetter} | {MidNumLet}) ({Format} | {Extend})* +MidNumericEx = ({MidNum} | {MidNumLet}) ({Format} | {Extend})* +ExtendNumLetEx = {ExtendNumLet} ({Format} | {Extend})* %{ @@ -156,12 +162,12 @@ ExtendNumLetEx = \p{WB:ExtendNumLet} [\p{WB:Format}\p{WB:Extend}] // // http://www.unicode.org/reports/tr14/#SA // -\p{LB:Complex_Context}+ { return SOUTH_EAST_ASIAN_TYPE; } +{ComplexContext}+ { return SOUTH_EAST_ASIAN_TYPE; } // UAX#29 WB14. Any ÷ Any // -\p{Script:Han} { return IDEOGRAPHIC_TYPE; } -\p{Script:Hiragana} { return HIRAGANA_TYPE; } +{Han} { return IDEOGRAPHIC_TYPE; } +{Hiragana} { return HIRAGANA_TYPE; } // UAX#29 WB3. CR × LF diff --git a/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizer.java b/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizer.java index 4ba30beba26..f440611eeb6 100644 --- a/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizer.java +++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizer.java @@ -1,4 +1,4 @@ -/* The following code was generated by JFlex 1.5.0-SNAPSHOT on 12/4/10 7:24 PM */ +/* The following code was generated by JFlex 1.5.0-SNAPSHOT on 1/6/11 12:09 AM */ package org.apache.lucene.analysis.standard; @@ -47,14 +47,6 @@ import org.apache.lucene.util.AttributeSource; *
  • <IDEOGRAPHIC>: A single CJKV ideographic character
  • *
  • <HIRAGANA>: A single hiragana character
  • * - * WARNING: Because JFlex does not support Unicode supplementary - * characters (characters above the Basic Multilingual Plane, which contains - * those up to and including U+FFFF), this scanner will not recognize them - * properly. If you need to be able to process text containing supplementary - * characters, consider using the ICU4J-backed implementation in modules/analysis/icu - * (org.apache.lucene.analysis.icu.segmentation.ICUTokenizer) - * instead of this class, since the ICU4J-backed implementation does not have - * this limitation. */ public final class UAX29URLEmailTokenizer extends Tokenizer { @@ -82,123 +74,147 @@ public final class UAX29URLEmailTokenizer extends Tokenizer { * Translates characters to character classes */ private static final String ZZ_CMAP_PACKED = - "\1\101\10\77\2\101\2\77\1\101\23\77\1\102\1\76\1\71\1\102"+ - "\1\62\1\60\1\65\2\63\2\102\1\64\1\44\1\11\1\70\1\45"+ - "\1\50\1\55\1\51\1\54\1\46\1\47\1\56\1\53\1\52\1\57"+ - "\1\74\1\77\1\75\1\77\1\67\1\66\1\12\1\36\1\13\1\14"+ - "\1\15\1\20\1\21\1\37\1\22\1\40\1\43\1\23\1\24\1\25"+ - "\1\17\1\27\1\26\1\16\1\30\1\31\1\32\1\41\1\33\1\34"+ - "\1\42\1\35\1\72\1\100\1\73\1\103\1\61\1\103\1\12\1\36"+ - "\1\13\1\14\1\15\1\20\1\21\1\37\1\22\1\40\1\43\1\23"+ - "\1\24\1\25\1\17\1\27\1\26\1\16\1\30\1\31\1\32\1\41"+ - "\1\33\1\34\1\42\1\35\3\103\1\60\1\104\52\0\1\1\2\0"+ - "\1\2\7\0\1\1\1\0\1\5\2\0\1\1\5\0\27\1\1\0"+ - "\37\1\1\0\u01ca\1\4\0\14\1\16\0\5\1\7\0\1\1\1\0"+ - "\1\1\21\0\160\2\5\1\1\0\2\1\2\0\4\1\1\6\7\0"+ - "\1\1\1\5\3\1\1\0\1\1\1\0\24\1\1\0\123\1\1\0"+ - "\213\1\1\0\7\2\236\1\11\0\46\1\2\0\1\1\7\0\47\1"+ - "\1\0\1\6\7\0\55\2\1\0\1\2\1\0\2\2\1\0\2\2"+ - "\1\0\1\2\10\0\33\1\5\0\4\1\1\5\13\0\4\2\10\0"+ - "\2\6\2\0\13\2\5\0\53\1\25\2\12\3\1\0\1\3\1\6"+ - "\1\0\2\1\1\2\143\1\1\0\1\1\10\2\1\0\6\2\2\1"+ - "\2\2\1\0\4\2\2\1\12\3\3\1\2\0\1\1\17\0\1\2"+ - "\1\1\1\2\36\1\33\2\2\0\131\1\13\2\1\1\16\0\12\3"+ - "\41\1\11\2\2\1\2\0\1\6\1\0\1\1\5\0\26\1\4\2"+ - "\1\1\11\2\1\1\3\2\1\1\5\2\22\0\31\1\3\2\244\0"+ - "\4\2\66\1\3\2\1\1\22\2\1\1\7\2\12\1\2\2\2\0"+ - "\12\3\1\0\7\1\1\0\7\1\1\0\3\2\1\0\10\1\2\0"+ - "\2\1\2\0\26\1\1\0\7\1\1\0\1\1\3\0\4\1\2\0"+ - "\1\2\1\1\7\2\2\0\2\2\2\0\3\2\1\1\10\0\1\2"+ - "\4\0\2\1\1\0\3\1\2\2\2\0\12\3\2\1\17\0\3\2"+ - "\1\0\6\1\4\0\2\1\2\0\26\1\1\0\7\1\1\0\2\1"+ - "\1\0\2\1\1\0\2\1\2\0\1\2\1\0\5\2\4\0\2\2"+ - "\2\0\3\2\3\0\1\2\7\0\4\1\1\0\1\1\7\0\12\3"+ - "\2\2\3\1\1\2\13\0\3\2\1\0\11\1\1\0\3\1\1\0"+ - "\26\1\1\0\7\1\1\0\2\1\1\0\5\1\2\0\1\2\1\1"+ - "\10\2\1\0\3\2\1\0\3\2\2\0\1\1\17\0\2\1\2\2"+ - "\2\0\12\3\21\0\3\2\1\0\10\1\2\0\2\1\2\0\26\1"+ - "\1\0\7\1\1\0\2\1\1\0\5\1\2\0\1\2\1\1\7\2"+ - "\2\0\2\2\2\0\3\2\10\0\2\2\4\0\2\1\1\0\3\1"+ - "\2\2\2\0\12\3\1\0\1\1\20\0\1\2\1\1\1\0\6\1"+ - "\3\0\3\1\1\0\4\1\3\0\2\1\1\0\1\1\1\0\2\1"+ - "\3\0\2\1\3\0\3\1\3\0\14\1\4\0\5\2\3\0\3\2"+ - "\1\0\4\2\2\0\1\1\6\0\1\2\16\0\12\3\21\0\3\2"+ - "\1\0\10\1\1\0\3\1\1\0\27\1\1\0\12\1\1\0\5\1"+ - "\3\0\1\1\7\2\1\0\3\2\1\0\4\2\7\0\2\2\1\0"+ - "\2\1\6\0\2\1\2\2\2\0\12\3\22\0\2\2\1\0\10\1"+ - "\1\0\3\1\1\0\27\1\1\0\12\1\1\0\5\1\2\0\1\2"+ - "\1\1\7\2\1\0\3\2\1\0\4\2\7\0\2\2\7\0\1\1"+ - "\1\0\2\1\2\2\2\0\12\3\1\0\2\1\17\0\2\2\1\0"+ - "\10\1\1\0\3\1\1\0\51\1\2\0\1\1\7\2\1\0\3\2"+ - "\1\0\4\2\1\1\10\0\1\2\10\0\2\1\2\2\2\0\12\3"+ - "\12\0\6\1\2\0\2\2\1\0\22\1\3\0\30\1\1\0\11\1"+ - "\1\0\1\1\2\0\7\1\3\0\1\2\4\0\6\2\1\0\1\2"+ - "\1\0\10\2\22\0\2\2\15\0\60\105\1\106\2\105\7\106\5\0"+ - "\7\105\10\106\1\0\12\3\47\0\2\105\1\0\1\105\2\0\2\105"+ - "\1\0\1\105\2\0\1\105\6\0\4\105\1\0\7\105\1\0\3\105"+ - "\1\0\1\105\1\0\1\105\2\0\2\105\1\0\4\105\1\106\2\105"+ - "\6\106\1\0\2\106\1\105\2\0\5\105\1\0\1\105\1\0\6\106"+ - "\2\0\12\3\2\0\2\105\42\0\1\1\27\0\2\2\6\0\12\3"+ - "\13\0\1\2\1\0\1\2\1\0\1\2\4\0\2\2\10\1\1\0"+ - "\44\1\4\0\24\2\1\0\2\2\5\1\13\2\1\0\44\2\11\0"+ - "\1\2\71\0\53\105\24\106\1\105\12\3\6\0\6\105\4\106\4\105"+ - "\3\106\1\105\3\106\2\105\7\106\3\105\4\106\15\105\14\106\1\105"+ - "\1\106\12\3\4\106\2\105\46\1\12\0\53\1\1\0\1\1\3\0"+ - "\u0149\1\1\0\4\1\2\0\7\1\1\0\1\1\1\0\4\1\2\0"+ - "\51\1\1\0\4\1\2\0\41\1\1\0\4\1\2\0\7\1\1\0"+ - "\1\1\1\0\4\1\2\0\17\1\1\0\71\1\1\0\4\1\2\0"+ - "\103\1\2\0\3\2\40\0\20\1\20\0\125\1\14\0\u026c\1\2\0"+ - "\21\1\1\0\32\1\5\0\113\1\3\0\3\1\17\0\15\1\1\0"+ - "\4\1\3\2\13\0\22\1\3\2\13\0\22\1\2\2\14\0\15\1"+ - "\1\0\3\1\1\0\2\2\14\0\64\105\40\106\3\0\1\105\4\0"+ - "\1\105\1\106\2\0\12\3\41\0\3\2\2\0\12\3\6\0\130\1"+ - "\10\0\51\1\1\2\1\1\5\0\106\1\12\0\35\1\3\0\14\2"+ - "\4\0\14\2\12\0\12\3\36\105\2\0\5\105\13\0\54\105\4\0"+ - "\21\106\7\105\2\106\6\0\12\3\1\105\3\0\2\105\40\0\27\1"+ - "\5\2\4\0\65\105\12\106\1\0\35\106\2\0\1\2\12\3\6\0"+ - "\12\3\6\0\16\105\122\0\5\2\57\1\21\2\7\1\4\0\12\3"+ - "\21\0\11\2\14\0\3\2\36\1\12\2\3\0\2\1\12\3\6\0"+ - "\46\1\16\2\14\0\44\1\24\2\10\0\12\3\3\0\3\1\12\3"+ - "\44\1\122\0\3\2\1\0\25\2\4\1\1\2\4\1\1\2\15\0"+ - "\300\1\47\2\25\0\4\2\u0116\1\2\0\6\1\2\0\46\1\2\0"+ - "\6\1\2\0\10\1\1\0\1\1\1\0\1\1\1\0\1\1\1\0"+ - "\37\1\2\0\65\1\1\0\7\1\1\0\1\1\3\0\3\1\1\0"+ - "\7\1\3\0\4\1\2\0\6\1\4\0\15\1\5\0\3\1\1\0"+ - "\7\1\17\0\4\2\10\0\2\7\12\0\1\7\2\0\1\5\2\0"+ - "\5\2\20\0\2\10\3\0\1\6\17\0\1\10\13\0\5\2\5\0"+ - "\6\2\1\0\1\1\15\0\1\1\20\0\15\1\63\0\41\2\21\0"+ - "\1\1\4\0\1\1\2\0\12\1\1\0\1\1\3\0\5\1\6\0"+ - "\1\1\1\0\1\1\1\0\1\1\1\0\4\1\1\0\13\1\2\0"+ - "\4\1\5\0\5\1\4\0\1\1\21\0\51\1\u032d\0\64\1\u0716\0"+ - "\57\1\1\0\57\1\1\0\205\1\6\0\4\1\3\2\16\0\46\1"+ - "\12\0\66\1\11\0\1\1\17\0\1\2\27\1\11\0\7\1\1\0"+ - "\7\1\1\0\7\1\1\0\7\1\1\0\7\1\1\0\7\1\1\0"+ - "\7\1\1\0\7\1\1\0\40\2\57\0\1\1\120\0\32\107\1\0"+ - "\131\107\14\0\326\107\57\0\1\1\1\0\1\107\31\0\11\107\6\2"+ - "\1\0\5\4\2\0\3\107\1\1\1\1\4\0\126\110\2\0\2\2"+ - "\2\4\3\110\133\4\1\0\4\4\5\0\51\1\3\0\136\1\21\0"+ - "\33\1\65\0\20\4\320\0\57\4\1\0\130\4\250\0\u19b6\107\112\0"+ - "\u51cc\107\64\0\u048d\1\103\0\56\1\2\0\u010d\1\3\0\20\1\12\3"+ - "\2\1\24\0\57\1\4\2\11\0\2\2\1\0\31\1\10\0\120\1"+ - "\2\2\45\0\11\1\2\0\147\1\2\0\4\1\1\0\2\1\16\0"+ - "\12\1\120\0\10\1\1\2\3\1\1\2\4\1\1\2\27\1\5\2"+ - "\30\0\64\1\14\0\2\2\62\1\21\2\13\0\12\3\6\0\22\2"+ - "\6\1\3\0\1\1\4\0\12\3\34\1\10\2\2\0\27\1\15\2"+ - "\14\0\35\1\3\0\4\2\57\1\16\2\16\0\1\1\12\3\46\0"+ - "\51\1\16\2\11\0\3\1\1\2\10\1\2\2\2\0\12\3\6\0"+ - "\33\105\1\106\4\0\60\105\1\106\1\105\3\106\2\105\2\106\5\105"+ - "\2\106\1\105\1\106\1\105\30\0\5\105\41\0\6\1\2\0\6\1"+ - "\2\0\6\1\11\0\7\1\1\0\7\1\221\0\43\1\10\2\1\0"+ - "\2\2\2\0\12\3\6\0\u2ba4\1\14\0\27\1\4\0\61\1\u2104\0"+ - "\u012e\107\2\0\76\107\2\0\152\107\46\0\7\1\14\0\5\1\5\0"+ - "\1\1\1\2\12\1\1\0\15\1\1\0\5\1\1\0\1\1\1\0"+ - "\2\1\1\0\2\1\1\0\154\1\41\0\u016b\1\22\0\100\1\2\0"+ - "\66\1\50\0\14\1\4\0\20\2\1\6\2\0\1\5\1\6\13\0"+ - "\7\2\14\0\2\10\30\0\3\10\1\6\1\0\1\7\1\0\1\6"+ - "\1\5\32\0\5\1\1\0\207\1\2\0\1\2\7\0\1\7\4\0"+ - "\1\6\1\0\1\7\1\0\12\3\1\5\1\6\5\0\32\1\4\0"+ - "\1\10\1\0\32\1\13\0\70\4\2\2\37\1\3\0\6\1\2\0"+ - "\6\1\2\0\6\1\2\0\3\1\34\0\3\2\4\0"; + "\1\236\10\234\2\236\2\234\1\236\23\234\1\237\1\233\1\226\1\237"+ + "\1\217\1\215\1\222\2\220\2\237\1\221\1\201\1\146\1\225\1\202"+ + "\1\205\1\212\1\206\1\211\1\203\1\204\1\213\1\210\1\207\1\214"+ + "\1\231\1\234\1\232\1\234\1\224\1\223\1\147\1\173\1\150\1\151"+ + "\1\152\1\155\1\156\1\174\1\157\1\175\1\200\1\160\1\161\1\162"+ + "\1\154\1\164\1\163\1\153\1\165\1\166\1\167\1\176\1\170\1\171"+ + "\1\177\1\172\1\227\1\235\1\230\1\240\1\216\1\240\1\147\1\173"+ + "\1\150\1\151\1\152\1\155\1\156\1\174\1\157\1\175\1\200\1\160"+ + "\1\161\1\162\1\154\1\164\1\163\1\153\1\165\1\166\1\167\1\176"+ + "\1\170\1\171\1\177\1\172\3\240\1\215\1\241\52\0\1\132\2\0"+ + "\1\133\7\0\1\132\1\0\1\136\2\0\1\132\5\0\27\132\1\0"+ + "\37\132\1\0\u01ca\132\4\0\14\132\16\0\5\132\7\0\1\132\1\0"+ + "\1\132\21\0\160\133\5\132\1\0\2\132\2\0\4\132\1\137\7\0"+ + "\1\132\1\136\3\132\1\0\1\132\1\0\24\132\1\0\123\132\1\0"+ + "\213\132\1\0\7\133\236\132\11\0\46\132\2\0\1\132\7\0\47\132"+ + "\1\0\1\137\7\0\55\133\1\0\1\133\1\0\2\133\1\0\2\133"+ + "\1\0\1\133\10\0\33\132\5\0\4\132\1\136\13\0\4\133\10\0"+ + "\2\137\2\0\13\133\5\0\53\132\25\133\12\134\1\0\1\134\1\137"+ + "\1\0\2\132\1\133\143\132\1\0\1\132\7\133\1\133\1\0\6\133"+ + "\2\132\2\133\1\0\4\133\2\132\12\134\3\132\2\0\1\132\17\0"+ + "\1\133\1\132\1\133\36\132\33\133\2\0\131\132\13\133\1\132\16\0"+ + "\12\134\41\132\11\133\2\132\2\0\1\137\1\0\1\132\5\0\26\132"+ + "\4\133\1\132\11\133\1\132\3\133\1\132\5\133\22\0\31\132\3\133"+ + "\244\0\4\133\66\132\3\133\1\132\22\133\1\132\7\133\12\132\2\133"+ + "\2\0\12\134\1\0\7\132\1\0\7\132\1\0\3\133\1\0\10\132"+ + "\2\0\2\132\2\0\26\132\1\0\7\132\1\0\1\132\3\0\4\132"+ + "\2\0\1\133\1\132\7\133\2\0\2\133\2\0\3\133\1\132\10\0"+ + "\1\133\4\0\2\132\1\0\3\132\2\133\2\0\12\134\2\132\17\0"+ + "\3\133\1\0\6\132\4\0\2\132\2\0\26\132\1\0\7\132\1\0"+ + "\2\132\1\0\2\132\1\0\2\132\2\0\1\133\1\0\5\133\4\0"+ + "\2\133\2\0\3\133\3\0\1\133\7\0\4\132\1\0\1\132\7\0"+ + "\12\134\2\133\3\132\1\133\13\0\3\133\1\0\11\132\1\0\3\132"+ + "\1\0\26\132\1\0\7\132\1\0\2\132\1\0\5\132\2\0\1\133"+ + "\1\132\10\133\1\0\3\133\1\0\3\133\2\0\1\132\17\0\2\132"+ + "\2\133\2\0\12\134\21\0\3\133\1\0\10\132\2\0\2\132\2\0"+ + "\26\132\1\0\7\132\1\0\2\132\1\0\5\132\2\0\1\133\1\132"+ + "\7\133\2\0\2\133\2\0\3\133\10\0\2\133\4\0\2\132\1\0"+ + "\3\132\2\133\2\0\12\134\1\0\1\132\20\0\1\133\1\132\1\0"+ + "\6\132\3\0\3\132\1\0\4\132\3\0\2\132\1\0\1\132\1\0"+ + "\2\132\3\0\2\132\3\0\3\132\3\0\14\132\4\0\5\133\3\0"+ + "\3\133\1\0\4\133\2\0\1\132\6\0\1\133\16\0\12\134\21\0"+ + "\3\133\1\0\10\132\1\0\3\132\1\0\27\132\1\0\12\132\1\0"+ + "\5\132\3\0\1\132\7\133\1\0\3\133\1\0\4\133\7\0\2\133"+ + "\1\0\2\132\6\0\2\132\2\133\2\0\12\134\22\0\2\133\1\0"+ + "\10\132\1\0\3\132\1\0\27\132\1\0\12\132\1\0\5\132\2\0"+ + "\1\133\1\132\7\133\1\0\3\133\1\0\4\133\7\0\2\133\7\0"+ + "\1\132\1\0\2\132\2\133\2\0\12\134\1\0\2\132\17\0\2\133"+ + "\1\0\10\132\1\0\3\132\1\0\51\132\2\0\1\132\7\133\1\0"+ + "\3\133\1\0\4\133\1\132\10\0\1\133\10\0\2\132\2\133\2\0"+ + "\12\134\12\0\6\132\2\0\2\133\1\0\22\132\3\0\30\132\1\0"+ + "\11\132\1\0\1\132\2\0\7\132\3\0\1\133\4\0\6\133\1\0"+ + "\1\133\1\0\10\133\22\0\2\133\15\0\60\142\1\143\2\142\7\143"+ + "\5\0\7\142\10\143\1\0\12\134\47\0\2\142\1\0\1\142\2\0"+ + "\2\142\1\0\1\142\2\0\1\142\6\0\4\142\1\0\7\142\1\0"+ + "\3\142\1\0\1\142\1\0\1\142\2\0\2\142\1\0\4\142\1\143"+ + "\2\142\6\143\1\0\2\143\1\142\2\0\5\142\1\0\1\142\1\0"+ + "\6\143\2\0\12\134\2\0\2\142\42\0\1\132\27\0\2\133\6\0"+ + "\12\134\13\0\1\133\1\0\1\133\1\0\1\133\4\0\2\133\10\132"+ + "\1\0\44\132\4\0\24\133\1\0\2\133\5\132\13\133\1\0\44\133"+ + "\11\0\1\133\71\0\53\142\24\143\1\142\12\134\6\0\6\142\4\143"+ + "\4\142\3\143\1\142\3\143\2\142\7\143\3\142\4\143\15\142\14\143"+ + "\1\142\1\143\12\134\4\143\2\142\46\132\12\0\53\132\1\0\1\132"+ + "\3\0\u0149\132\1\0\4\132\2\0\7\132\1\0\1\132\1\0\4\132"+ + "\2\0\51\132\1\0\4\132\2\0\41\132\1\0\4\132\2\0\7\132"+ + "\1\0\1\132\1\0\4\132\2\0\17\132\1\0\71\132\1\0\4\132"+ + "\2\0\103\132\2\0\3\133\40\0\20\132\20\0\125\132\14\0\u026c\132"+ + "\2\0\21\132\1\0\32\132\5\0\113\132\3\0\3\132\17\0\15\132"+ + "\1\0\4\132\3\133\13\0\22\132\3\133\13\0\22\132\2\133\14\0"+ + "\15\132\1\0\3\132\1\0\2\133\14\0\64\142\2\143\36\143\3\0"+ + "\1\142\4\0\1\142\1\143\2\0\12\134\41\0\3\133\2\0\12\134"+ + "\6\0\130\132\10\0\51\132\1\133\1\132\5\0\106\132\12\0\35\132"+ + "\3\0\14\133\4\0\14\133\12\0\12\134\36\142\2\0\5\142\13\0"+ + "\54\142\4\0\21\143\7\142\2\143\6\0\12\134\1\142\3\0\2\142"+ + "\40\0\27\132\5\133\4\0\65\142\12\143\1\0\35\143\2\0\1\133"+ + "\12\134\6\0\12\134\6\0\16\142\122\0\5\133\57\132\21\133\7\132"+ + "\4\0\12\134\21\0\11\133\14\0\3\133\36\132\12\133\3\0\2\132"+ + "\12\134\6\0\46\132\16\133\14\0\44\132\24\133\10\0\12\134\3\0"+ + "\3\132\12\134\44\132\122\0\3\133\1\0\25\133\4\132\1\133\4\132"+ + "\1\133\15\0\300\132\47\133\25\0\4\133\u0116\132\2\0\6\132\2\0"+ + "\46\132\2\0\6\132\2\0\10\132\1\0\1\132\1\0\1\132\1\0"+ + "\1\132\1\0\37\132\2\0\65\132\1\0\7\132\1\0\1\132\3\0"+ + "\3\132\1\0\7\132\3\0\4\132\2\0\6\132\4\0\15\132\5\0"+ + "\3\132\1\0\7\132\17\0\2\133\2\133\10\0\2\140\12\0\1\140"+ + "\2\0\1\136\2\0\5\133\20\0\2\141\3\0\1\137\17\0\1\141"+ + "\13\0\5\133\5\0\6\133\1\0\1\132\15\0\1\132\20\0\15\132"+ + "\63\0\41\133\21\0\1\132\4\0\1\132\2\0\12\132\1\0\1\132"+ + "\3\0\5\132\6\0\1\132\1\0\1\132\1\0\1\132\1\0\4\132"+ + "\1\0\13\132\2\0\4\132\5\0\5\132\4\0\1\132\21\0\51\132"+ + "\u032d\0\64\132\u0716\0\57\132\1\0\57\132\1\0\205\132\6\0\4\132"+ + "\3\133\16\0\46\132\12\0\66\132\11\0\1\132\17\0\1\133\27\132"+ + "\11\0\7\132\1\0\7\132\1\0\7\132\1\0\7\132\1\0\7\132"+ + "\1\0\7\132\1\0\7\132\1\0\7\132\1\0\40\133\57\0\1\132"+ + "\120\0\32\144\1\0\131\144\14\0\326\144\57\0\1\132\1\0\1\144"+ + "\31\0\11\144\6\133\1\0\5\135\2\0\3\144\1\132\1\132\4\0"+ + "\126\145\2\0\2\133\2\135\3\145\133\135\1\0\4\135\5\0\51\132"+ + "\3\0\136\132\21\0\33\132\65\0\20\135\320\0\57\135\1\0\130\135"+ + "\250\0\u19b6\144\112\0\u51cc\144\64\0\u048d\132\103\0\56\132\2\0\u010d\132"+ + "\3\0\20\132\12\134\2\132\24\0\57\132\4\133\11\0\2\133\1\0"+ + "\31\132\10\0\120\132\2\133\45\0\11\132\2\0\147\132\2\0\4\132"+ + "\1\0\2\132\16\0\12\132\120\0\10\132\1\133\3\132\1\133\4\132"+ + "\1\133\27\132\5\133\30\0\64\132\14\0\2\133\62\132\21\133\13\0"+ + "\12\134\6\0\22\133\6\132\3\0\1\132\4\0\12\134\34\132\10\133"+ + "\2\0\27\132\15\133\14\0\35\132\3\0\4\133\57\132\16\133\16\0"+ + "\1\132\12\134\46\0\51\132\16\133\11\0\3\132\1\133\10\132\2\133"+ + "\2\0\12\134\6\0\33\142\1\143\4\0\60\142\1\143\1\142\3\143"+ + "\2\142\2\143\5\142\2\143\1\142\1\143\1\142\30\0\5\142\41\0"+ + "\6\132\2\0\6\132\2\0\6\132\11\0\7\132\1\0\7\132\221\0"+ + "\43\132\10\133\1\0\2\133\2\0\12\134\6\0\u2ba4\132\14\0\27\132"+ + "\4\0\61\132\4\0\1\31\1\25\1\46\1\43\1\13\3\0\1\7"+ + "\1\5\2\0\1\3\1\1\14\0\1\11\21\0\1\112\7\0\1\65"+ + "\1\17\6\0\1\130\3\0\1\120\1\120\1\120\1\120\1\120\1\120"+ + "\1\120\1\120\1\120\1\120\1\120\1\120\1\120\1\120\1\120\1\120"+ + "\1\120\1\120\1\120\1\120\1\120\1\120\1\120\1\120\1\120\1\120"+ + "\1\120\1\120\1\120\1\120\1\120\1\120\1\120\1\120\1\120\1\120"+ + "\1\120\1\120\1\120\1\120\1\120\1\121\1\120\1\120\1\120\1\125"+ + "\1\123\17\0\1\114\u02c1\0\1\70\277\0\1\113\1\71\1\2\3\124"+ + "\2\35\1\124\1\35\2\124\1\14\21\124\2\60\7\73\1\72\7\73"+ + "\7\52\1\15\1\52\1\75\2\45\1\44\1\75\1\45\1\44\10\75"+ + "\2\63\5\61\2\54\5\61\1\6\10\37\5\21\3\27\12\106\20\27"+ + "\3\42\32\30\1\26\2\24\2\110\1\111\2\110\2\111\2\110\1\111"+ + "\3\24\1\16\2\24\12\64\1\74\1\41\1\34\1\64\6\41\1\34"+ + "\66\41\5\115\6\103\1\51\4\103\2\51\10\103\1\51\7\100\1\12"+ + "\2\100\32\103\1\12\4\100\1\12\5\102\1\101\1\102\3\101\7\102"+ + "\1\101\23\102\5\67\3\102\6\67\2\67\6\66\10\66\2\100\7\66"+ + "\36\100\4\66\102\100\15\115\1\77\2\115\1\131\3\117\1\115\2\117"+ + "\5\115\4\117\4\116\1\115\3\116\1\115\5\116\26\56\4\23\1\105"+ + "\2\104\4\122\1\104\2\122\3\76\33\122\35\55\3\122\35\126\3\122"+ + "\6\126\2\33\31\126\1\33\17\126\6\122\4\22\1\10\37\22\1\10"+ + "\4\22\25\62\1\127\11\62\21\55\5\62\1\57\12\40\13\62\4\55"+ + "\1\50\6\55\12\122\17\55\1\47\3\53\15\20\11\36\1\32\24\36"+ + "\2\20\11\36\1\32\31\36\1\32\4\20\4\36\2\32\2\107\1\4"+ + "\5\107\52\4\u1900\0\u012e\144\2\0\76\144\2\0\152\144\46\0\7\132"+ + "\14\0\5\132\5\0\1\132\1\133\12\132\1\0\15\132\1\0\5\132"+ + "\1\0\1\132\1\0\2\132\1\0\2\132\1\0\154\132\41\0\u016b\132"+ + "\22\0\100\132\2\0\66\132\50\0\14\132\4\0\20\133\1\137\2\0"+ + "\1\136\1\137\13\0\7\133\14\0\2\141\30\0\3\141\1\137\1\0"+ + "\1\140\1\0\1\137\1\136\32\0\5\132\1\0\207\132\2\0\1\133"+ + "\7\0\1\140\4\0\1\137\1\0\1\140\1\0\12\134\1\136\1\137"+ + "\5\0\32\132\4\0\1\141\1\0\32\132\13\0\70\135\2\133\37\132"+ + "\3\0\6\132\2\0\6\132\2\0\6\132\2\0\3\132\34\0\3\133"+ + "\4\0"; /** * Translates characters to character classes @@ -211,25 +227,26 @@ public final class UAX29URLEmailTokenizer extends Tokenizer { private static final int [] ZZ_ACTION = zzUnpackAction(); private static final String ZZ_ACTION_PACKED_0 = - "\1\0\1\1\1\2\1\3\1\2\2\1\3\2\3\3"+ - "\3\1\1\4\1\5\1\6\1\2\1\0\1\2\1\0"+ - "\1\3\6\0\2\2\1\0\3\2\1\0\1\3\1\0"+ - "\2\3\1\2\1\3\11\0\32\2\3\0\4\2\32\0"+ - "\4\3\10\0\1\7\1\0\6\10\3\2\2\10\1\2"+ - "\4\10\1\2\2\10\2\0\1\2\1\0\1\2\6\10"+ - "\3\0\2\10\1\0\4\10\1\0\2\10\1\0\2\3"+ - "\10\0\1\10\32\0\1\10\1\0\3\10\6\2\1\0"+ - "\1\2\2\0\1\2\1\0\1\10\10\0\3\3\15\0"+ - "\3\10\6\7\3\0\2\7\1\0\4\7\1\0\2\7"+ - "\2\10\1\0\2\10\1\0\2\10\1\0\1\10\2\2"+ - "\7\0\2\3\20\0\1\7\10\0\1\10\3\0\1\2"+ - "\32\0\3\10\23\0\1\10\27\0\1\10\4\0\1\10"+ - "\6\0\1\10\4\0\2\10\36\0\1\10\51\0\1\10"+ - "\42\0\1\10\50\0\1\10\122\0\1\10\116\0\1\10"+ - "\107\0\1\10\74\0\1\10\51\0\1\10\333\0"; + "\1\0\23\1\1\2\1\3\1\2\1\1\1\4\1\5"+ + "\1\6\1\1\3\2\3\3\3\1\15\0\1\2\1\0"+ + "\1\2\10\0\1\3\21\0\2\2\1\0\3\2\1\0"+ + "\1\3\1\0\2\3\1\2\1\3\46\0\32\2\3\0"+ + "\4\2\32\0\4\3\17\0\1\7\1\0\6\10\3\2"+ + "\2\10\1\2\4\10\1\2\2\10\2\0\1\2\1\0"+ + "\1\2\6\10\3\0\2\10\1\0\4\10\1\0\2\10"+ + "\1\0\2\3\10\0\1\10\32\0\1\10\1\0\3\10"+ + "\6\2\1\0\1\2\2\0\1\2\1\0\1\10\10\0"+ + "\3\3\15\0\3\10\6\7\3\0\2\7\1\0\4\7"+ + "\1\0\2\7\2\10\1\0\2\10\1\0\2\10\1\0"+ + "\1\10\2\2\7\0\2\3\20\0\1\7\10\0\1\10"+ + "\3\0\1\2\32\0\3\10\23\0\1\10\27\0\1\10"+ + "\4\0\1\10\6\0\1\10\4\0\2\10\36\0\1\10"+ + "\51\0\1\10\42\0\1\10\51\0\1\10\122\0\1\10"+ + "\117\0\1\10\107\0\1\10\74\0\1\10\51\0\1\10"+ + "\333\0"; private static int [] zzUnpackAction() { - int [] result = new int[1117]; + int [] result = new int[1204]; int offset = 0; offset = zzUnpackAction(ZZ_ACTION_PACKED_0, offset, result); return result; @@ -254,149 +271,160 @@ public final class UAX29URLEmailTokenizer extends Tokenizer { private static final int [] ZZ_ROWMAP = zzUnpackRowMap(); private static final String ZZ_ROWMAP_PACKED_0 = - "\0\0\0\111\0\222\0\333\0\u0124\0\u016d\0\u01b6\0\u01ff"+ - "\0\u0248\0\u0291\0\u02da\0\u0323\0\u036c\0\u03b5\0\u03fe\0\u0447"+ - "\0\u0490\0\111\0\111\0\u04d9\0\u0522\0\u056b\0\u05b4\0\u05fd"+ - "\0\u016d\0\u0646\0\u01b6\0\u068f\0\u06d8\0\u0721\0\u076a\0\u07b3"+ - "\0\u07fc\0\u0845\0\u088e\0\u08d7\0\u0920\0\u0969\0\u09b2\0\u09fb"+ - "\0\u0a44\0\u0a8d\0\u0ad6\0\u03b5\0\u0b1f\0\u0b68\0\u0447\0\u0bb1"+ - "\0\u0bfa\0\u0c43\0\u0c8c\0\u0cd5\0\u0d1e\0\u0d67\0\u0db0\0\u0df9"+ - "\0\u0e42\0\u0e8b\0\u0ed4\0\u0f1d\0\u0f66\0\u0faf\0\u0ff8\0\u1041"+ - "\0\u108a\0\u10d3\0\u111c\0\u1165\0\u11ae\0\u11f7\0\u1240\0\u1289"+ - "\0\u12d2\0\u131b\0\u1364\0\u13ad\0\u13f6\0\u143f\0\u1488\0\u14d1"+ - "\0\u151a\0\u1563\0\u15ac\0\u15f5\0\u163e\0\u1687\0\u16d0\0\u1719"+ - "\0\u1762\0\u17ab\0\u17f4\0\u183d\0\u1886\0\u18cf\0\u1918\0\u1961"+ - "\0\u19aa\0\u19f3\0\u1a3c\0\u1a85\0\u1ace\0\u1b17\0\u1b60\0\u1ba9"+ - "\0\u1bf2\0\u1c3b\0\u1c84\0\u1ccd\0\u1d16\0\u1d5f\0\u1da8\0\u1df1"+ - "\0\u1e3a\0\u1e83\0\u1ecc\0\u1f15\0\u1f5e\0\u1fa7\0\u1ff0\0\u2039"+ - "\0\u2082\0\u20cb\0\u2114\0\111\0\u215d\0\u21a6\0\u21ef\0\u2238"+ - "\0\u2281\0\u22ca\0\u2313\0\u235c\0\u23a5\0\u23ee\0\u2437\0\u2480"+ - "\0\u24c9\0\u2512\0\u255b\0\u25a4\0\u25ed\0\u2636\0\u267f\0\u26c8"+ - "\0\u2711\0\u275a\0\u27a3\0\u27ec\0\u2835\0\u287e\0\u28c7\0\u2910"+ - "\0\u2959\0\u29a2\0\u29eb\0\u2a34\0\u2a7d\0\u2ac6\0\u2b0f\0\u2b58"+ - "\0\u2ba1\0\u2bea\0\u2c33\0\u2c7c\0\u2cc5\0\u2d0e\0\u2d57\0\u2da0"+ - "\0\u2de9\0\u2e32\0\u2e7b\0\u2ec4\0\u2f0d\0\u2f56\0\u2f9f\0\u2fe8"+ - "\0\u3031\0\u307a\0\u30c3\0\u310c\0\u3155\0\u319e\0\u31e7\0\u3230"+ - "\0\u3279\0\u32c2\0\u330b\0\u3354\0\u339d\0\u33e6\0\u342f\0\u3478"+ - "\0\u34c1\0\u350a\0\u3553\0\u359c\0\u35e5\0\u362e\0\u3677\0\u36c0"+ - "\0\u3709\0\u3752\0\u379b\0\u37e4\0\u382d\0\u3876\0\u38bf\0\u3908"+ - "\0\u3951\0\u399a\0\u39e3\0\u3a2c\0\u3a75\0\u3abe\0\u3b07\0\u3b50"+ - "\0\u3b99\0\u3be2\0\u3c2b\0\u3c74\0\u3cbd\0\u3d06\0\u3d4f\0\u3d98"+ - "\0\u3de1\0\u3e2a\0\u3e73\0\u3ebc\0\u3f05\0\u3f4e\0\u3f97\0\u3fe0"+ - "\0\u4029\0\u4072\0\u40bb\0\u4104\0\u414d\0\u4196\0\u41df\0\u4228"+ - "\0\u4271\0\u42ba\0\u4303\0\u434c\0\u4395\0\u43de\0\u4427\0\u4470"+ - "\0\u44b9\0\u4502\0\u454b\0\u4594\0\u45dd\0\u4626\0\u466f\0\u46b8"+ - "\0\u4701\0\u474a\0\u4793\0\u47dc\0\u4825\0\u486e\0\u48b7\0\u4900"+ - "\0\u4949\0\u4992\0\u49db\0\u4a24\0\u4a6d\0\u4ab6\0\u4aff\0\u4b48"+ - "\0\u4b91\0\u4bda\0\u4c23\0\u4c6c\0\u4cb5\0\u4cfe\0\u4d47\0\u4d90"+ - "\0\u4dd9\0\u4e22\0\u4e6b\0\u4eb4\0\u4efd\0\u4f46\0\u4f8f\0\u4fd8"+ - "\0\u5021\0\u506a\0\u50b3\0\u50fc\0\u5145\0\u518e\0\u51d7\0\u5220"+ - "\0\u5269\0\u52b2\0\u52fb\0\u5344\0\u538d\0\u53d6\0\u541f\0\u5468"+ - "\0\u54b1\0\u54fa\0\u5543\0\u20cb\0\u558c\0\u55d5\0\u561e\0\u5667"+ - "\0\u56b0\0\u56f9\0\u5742\0\u578b\0\u57d4\0\u581d\0\u5866\0\u58af"+ - "\0\u58f8\0\u5941\0\u598a\0\u59d3\0\u5a1c\0\u5a65\0\u5aae\0\u5af7"+ - "\0\u5b40\0\u5b89\0\u5bd2\0\u5c1b\0\u5c64\0\u5cad\0\u5cf6\0\u5d3f"+ - "\0\u5d88\0\u5dd1\0\u5e1a\0\u5e63\0\u5eac\0\u5ef5\0\u5f3e\0\u5f87"+ - "\0\u5fd0\0\u6019\0\u6062\0\u60ab\0\u60f4\0\u613d\0\u6186\0\u61cf"+ - "\0\u6218\0\u6261\0\u62aa\0\u62f3\0\u633c\0\u6385\0\u63ce\0\u6417"+ - "\0\u6460\0\u64a9\0\u64f2\0\u653b\0\u6584\0\u65cd\0\u6616\0\u665f"+ - "\0\u66a8\0\u66f1\0\u673a\0\u6783\0\u67cc\0\u6815\0\u685e\0\u68a7"+ - "\0\u68f0\0\u6939\0\u6982\0\u69cb\0\u6a14\0\u6a5d\0\u6aa6\0\u6aef"+ - "\0\u6b38\0\u6b81\0\u6bca\0\u6c13\0\u6c5c\0\u6ca5\0\u6cee\0\u6d37"+ - "\0\u6d80\0\u6dc9\0\u6e12\0\u6e5b\0\u6ea4\0\u6eed\0\u6f36\0\u6f7f"+ - "\0\u6fc8\0\u7011\0\u705a\0\u70a3\0\u70ec\0\u7135\0\u717e\0\u71c7"+ - "\0\u7210\0\u03fe\0\u7259\0\u72a2\0\u72eb\0\u7334\0\u737d\0\u73c6"+ - "\0\u740f\0\u7458\0\u74a1\0\u74ea\0\u7533\0\u757c\0\u75c5\0\u760e"+ - "\0\u7657\0\u76a0\0\u76e9\0\u7732\0\u777b\0\u77c4\0\u780d\0\u7856"+ - "\0\u789f\0\u78e8\0\u7931\0\u797a\0\u79c3\0\u7a0c\0\u7a55\0\u7a9e"+ - "\0\u7ae7\0\u7b30\0\u7b79\0\u7bc2\0\u7c0b\0\u7c54\0\u7c9d\0\u7ce6"+ - "\0\u7d2f\0\u7d78\0\u7dc1\0\u7e0a\0\u7e53\0\u7e9c\0\u7ee5\0\u7f2e"+ - "\0\u7f77\0\u7fc0\0\u8009\0\u8052\0\u809b\0\u80e4\0\u812d\0\u8176"+ - "\0\u81bf\0\u8208\0\u8251\0\u829a\0\u82e3\0\u832c\0\u8375\0\u83be"+ - "\0\u8407\0\u8450\0\u8499\0\u84e2\0\u852b\0\u8574\0\u85bd\0\u8606"+ - "\0\u864f\0\u8698\0\u86e1\0\u872a\0\u8773\0\u87bc\0\u8805\0\u884e"+ - "\0\u8897\0\u88e0\0\u8929\0\u8972\0\u89bb\0\u8a04\0\u8a4d\0\u8a96"+ - "\0\u8adf\0\u8b28\0\u8b71\0\u8bba\0\u8c03\0\u8c4c\0\u8c95\0\u8cde"+ - "\0\u8d27\0\u8d70\0\u8db9\0\u8e02\0\u8e4b\0\u8e94\0\u8edd\0\u8f26"+ - "\0\u8f6f\0\u8fb8\0\u9001\0\u904a\0\u9093\0\u90dc\0\u9125\0\u916e"+ - "\0\u91b7\0\u9200\0\u9249\0\u9292\0\u92db\0\u9324\0\u936d\0\u93b6"+ - "\0\u93ff\0\u9448\0\u9491\0\u94da\0\u9523\0\u956c\0\u95b5\0\u95fe"+ - "\0\u9647\0\u9690\0\u96d9\0\u9722\0\u976b\0\u97b4\0\u97fd\0\u9846"+ - "\0\u988f\0\u98d8\0\u9921\0\u996a\0\u99b3\0\u99fc\0\u9a45\0\u9a8e"+ - "\0\u9ad7\0\u9b20\0\u9b69\0\u9bb2\0\u9bfb\0\u9c44\0\u9c8d\0\u9cd6"+ - "\0\u9d1f\0\u9d68\0\u9db1\0\u9dfa\0\u9e43\0\u9e8c\0\u9ed5\0\u9f1e"+ - "\0\u9f67\0\u9fb0\0\u9ff9\0\ua042\0\ua08b\0\ua0d4\0\ua11d\0\ua166"+ - "\0\ua1af\0\ua1f8\0\ua241\0\ua28a\0\ua2d3\0\ua31c\0\ua365\0\ua3ae"+ - "\0\ua3f7\0\ua440\0\ua489\0\ua4d2\0\ua51b\0\ua564\0\ua5ad\0\ua5f6"+ - "\0\ua63f\0\ua688\0\ua6d1\0\ua71a\0\ua763\0\ua7ac\0\ua7f5\0\ua83e"+ - "\0\ua887\0\ua8d0\0\ua919\0\ua962\0\ua9ab\0\ua9f4\0\uaa3d\0\uaa86"+ - "\0\uaacf\0\uab18\0\uab61\0\uabaa\0\uabf3\0\uac3c\0\uac85\0\uacce"+ - "\0\uad17\0\uad60\0\uada9\0\uadf2\0\uae3b\0\uae84\0\uaecd\0\uaf16"+ - "\0\uaf5f\0\uafa8\0\uaff1\0\ub03a\0\ub083\0\ub0cc\0\ub115\0\ub15e"+ - "\0\ub1a7\0\ub1f0\0\ub239\0\ub282\0\ub2cb\0\ub314\0\ub35d\0\ub3a6"+ - "\0\ub3ef\0\ub438\0\ub481\0\ub4ca\0\ub513\0\ub55c\0\ub5a5\0\ub5ee"+ - "\0\ub637\0\ub680\0\ub6c9\0\ub712\0\ub75b\0\ub7a4\0\ub7ed\0\ub836"+ - "\0\ub87f\0\ub8c8\0\ub911\0\ub95a\0\ub9a3\0\ub9ec\0\uba35\0\uba7e"+ - "\0\ubac7\0\ubb10\0\ubb59\0\ubba2\0\ubbeb\0\ubc34\0\ubc7d\0\ubcc6"+ - "\0\ubd0f\0\ubd58\0\ubda1\0\ubdea\0\ube33\0\ube7c\0\ubec5\0\ubf0e"+ - "\0\ubf57\0\ubfa0\0\ubfe9\0\uc032\0\uc07b\0\uc0c4\0\uc10d\0\uc156"+ - "\0\uc19f\0\uc1e8\0\uc231\0\uc27a\0\uc2c3\0\uc30c\0\uc355\0\uc39e"+ - "\0\uc3e7\0\uc430\0\uc479\0\uc4c2\0\uc50b\0\uc554\0\uc59d\0\uc5e6"+ - "\0\uc62f\0\uc678\0\uc6c1\0\uc70a\0\uc753\0\uc79c\0\uc7e5\0\uc82e"+ - "\0\uc877\0\uc8c0\0\uc909\0\uc952\0\uc99b\0\uc9e4\0\uca2d\0\uca76"+ - "\0\ucabf\0\ucb08\0\ucb51\0\ucb9a\0\ucbe3\0\ucc2c\0\ucc75\0\uccbe"+ - "\0\ucd07\0\ucd50\0\ucd99\0\ucde2\0\uce2b\0\uce74\0\ucebd\0\ucf06"+ - "\0\ucf4f\0\ucf98\0\ucfe1\0\ud02a\0\ud073\0\ud0bc\0\ud105\0\ud14e"+ - "\0\ud197\0\ud1e0\0\ud229\0\ud272\0\ud2bb\0\ud304\0\ud34d\0\ud396"+ - "\0\ud3df\0\ud428\0\ud471\0\ud4ba\0\ud503\0\ud54c\0\ud595\0\ud5de"+ - "\0\ud627\0\ud670\0\ud6b9\0\ud702\0\ud74b\0\ud794\0\ud7dd\0\ud826"+ - "\0\ud86f\0\ud8b8\0\ud901\0\ud94a\0\ud993\0\ud9dc\0\uda25\0\uda6e"+ - "\0\udab7\0\udb00\0\udb49\0\udb92\0\udbdb\0\udc24\0\udc6d\0\udcb6"+ - "\0\udcff\0\udd48\0\udd91\0\uddda\0\ude23\0\ude6c\0\udeb5\0\udefe"+ - "\0\udf47\0\udf90\0\udfd9\0\ue022\0\ue06b\0\ue0b4\0\ue0fd\0\ue146"+ - "\0\ue18f\0\ue1d8\0\ue221\0\ue26a\0\ue2b3\0\ue2fc\0\ue345\0\ue38e"+ - "\0\ue3d7\0\ue420\0\ue469\0\ue4b2\0\ue4fb\0\ue544\0\ue58d\0\ue5d6"+ - "\0\ue61f\0\ue668\0\ue6b1\0\ue6fa\0\ue743\0\ue78c\0\ue7d5\0\ue81e"+ - "\0\ue867\0\ue8b0\0\ue8f9\0\ue942\0\ue98b\0\ue9d4\0\uea1d\0\uea66"+ - "\0\ueaaf\0\ueaf8\0\ueb41\0\ueb8a\0\uebd3\0\uec1c\0\uec65\0\uecae"+ - "\0\uecf7\0\ued40\0\ued89\0\uedd2\0\uee1b\0\uee64\0\ueead\0\ueef6"+ - "\0\uef3f\0\uef88\0\uefd1\0\uf01a\0\uf063\0\uf0ac\0\uf0f5\0\uf13e"+ - "\0\uf187\0\uf1d0\0\uf219\0\uf262\0\uf2ab\0\uf2f4\0\uf33d\0\uf386"+ - "\0\uf3cf\0\uf418\0\uf461\0\uf4aa\0\uf4f3\0\uf53c\0\uf585\0\uf5ce"+ - "\0\uf617\0\uf660\0\uf6a9\0\uf6f2\0\uf73b\0\uf784\0\uf7cd\0\uf816"+ - "\0\uf85f\0\uf8a8\0\uf8f1\0\uf93a\0\uf983\0\uf9cc\0\ufa15\0\ufa5e"+ - "\0\ufaa7\0\ufaf0\0\ufb39\0\ufb82\0\ufbcb\0\ufc14\0\ufc5d\0\ufca6"+ - "\0\ufcef\0\ufd38\0\ufd81\0\ufdca\0\ufe13\0\ufe5c\0\ufea5\0\ufeee"+ - "\0\uff37\0\uff80\0\uffc9\1\22\1\133\1\244\1\355\1\u0136"+ - "\1\u017f\1\u01c8\1\u0211\1\u025a\1\u02a3\1\u02ec\1\u0335\1\u037e"+ - "\1\u03c7\1\u0410\1\u0459\1\u04a2\1\u04eb\1\u0534\1\u057d\1\u05c6"+ - "\1\u060f\1\u0658\1\u06a1\1\u06ea\1\u0733\1\u077c\1\u07c5\1\u080e"+ - "\1\u0857\1\u08a0\1\u08e9\1\u0932\1\u097b\1\u09c4\1\u0a0d\1\u0a56"+ - "\1\u0a9f\1\u0ae8\1\u0b31\1\u0b7a\1\u0bc3\1\u0c0c\1\u0c55\1\u0c9e"+ - "\1\u0ce7\1\u0d30\1\u0d79\1\u0dc2\1\u0e0b\1\u0e54\1\u0e9d\1\u0ee6"+ - "\1\u0f2f\1\u0f78\1\u0fc1\1\u100a\1\u1053\1\u109c\1\u10e5\1\u112e"+ - "\1\u1177\1\u11c0\1\u1209\1\u1252\1\u129b\1\u12e4\1\u132d\1\u1376"+ - "\1\u13bf\1\u1408\1\u1451\1\u149a\1\u14e3\1\u152c\1\u1575\1\u15be"+ - "\1\u1607\1\u1650\1\u1699\1\u16e2\1\u172b\1\u1774\1\u17bd\1\u1806"+ - "\1\u184f\1\u1898\1\u18e1\1\u192a\1\u1973\1\u19bc\1\u1a05\1\u1a4e"+ - "\1\u1a97\1\u1ae0\1\u1b29\1\u1b72\1\u1bbb\1\u1c04\1\u1c4d\1\u1c96"+ - "\1\u1cdf\1\u1d28\1\u1d71\1\u1dba\1\u1e03\1\u1e4c\1\u1e95\1\u1ede"+ - "\1\u1f27\1\u1f70\1\u1fb9\1\u2002\1\u204b\1\u2094\1\u20dd\1\u2126"+ - "\1\u216f\1\u21b8\1\u2201\1\u224a\1\u2293\1\u22dc\1\u2325\1\u236e"+ - "\1\u23b7\1\u2400\1\u2449\1\u2492\1\u24db\1\u2524\1\u256d\1\u25b6"+ - "\1\u25ff\1\u2648\1\u2691\1\u26da\1\u2723\1\u276c\1\u27b5\1\u27fe"+ - "\1\u2847\1\u2890\1\u28d9\1\u2922\1\u296b\1\u29b4\1\u29fd\1\u2a46"+ - "\1\u2a8f\1\u2ad8\1\u2b21\1\u2b6a\1\u2bb3\1\u2bfc\1\u2c45\1\u2c8e"+ - "\1\u2cd7\1\u2d20\1\u2d69\1\u2db2\1\u2dfb\1\u2e44\1\u2e8d\1\u2ed6"+ - "\1\u2f1f\1\u2f68\1\u2fb1\1\u2ffa\1\u3043\1\u308c\1\u30d5\1\u311e"+ - "\1\u3167\1\u31b0\1\u31f9\1\u3242\1\u328b\1\u32d4\1\u331d\1\u3366"+ - "\1\u33af\1\u33f8\1\u3441\1\u348a\1\u34d3\1\u351c\1\u3565\1\u35ae"+ - "\1\u35f7\1\u3640\1\u3689\1\u36d2\1\u371b\1\u3764\1\u37ad\1\u37f6"+ - "\1\u383f\1\u3888\1\u38d1\1\u391a\1\u3963\1\u39ac\1\u39f5\1\u3a3e"+ - "\1\u3a87\1\u3ad0\1\u3b19\1\u3b62\1\u3bab"; + "\0\0\0\242\0\u0144\0\u01e6\0\u0288\0\u032a\0\u03cc\0\u046e"+ + "\0\u0510\0\u05b2\0\u0654\0\u06f6\0\u0798\0\u083a\0\u08dc\0\u097e"+ + "\0\u0a20\0\u0ac2\0\u0b64\0\u0c06\0\u0ca8\0\u0d4a\0\u0dec\0\u0e8e"+ + "\0\u0f30\0\242\0\242\0\u0fd2\0\u1074\0\u1116\0\u11b8\0\u125a"+ + "\0\u12fc\0\u139e\0\u1440\0\u14e2\0\u1584\0\u0144\0\u01e6\0\u0288"+ + "\0\u032a\0\u03cc\0\u1626\0\u16c8\0\u176a\0\u180c\0\u06f6\0\u18ae"+ + "\0\u1950\0\u19f2\0\u1a94\0\u1b36\0\u1bd8\0\u1c7a\0\u0510\0\u05b2"+ + "\0\u1d1c\0\u1dbe\0\u1e60\0\u1f02\0\u1fa4\0\u2046\0\u20e8\0\u218a"+ + "\0\u222c\0\u22ce\0\u2370\0\u2412\0\u24b4\0\u2556\0\u25f8\0\u269a"+ + "\0\u273c\0\u0e8e\0\u27de\0\u0fd2\0\u2880\0\u2922\0\u29c4\0\u2a66"+ + "\0\u2b08\0\u2baa\0\u2c4c\0\u2cee\0\u2d90\0\u2e32\0\u2ed4\0\u2f76"+ + "\0\u3018\0\u30ba\0\u315c\0\u31fe\0\u1440\0\u32a0\0\u3342\0\u1584"+ + "\0\u33e4\0\u3486\0\u3528\0\u35ca\0\u366c\0\u370e\0\u37b0\0\u3852"+ + "\0\u38f4\0\u3996\0\u3a38\0\u3ada\0\u3b7c\0\u3c1e\0\u3cc0\0\u3d62"+ + "\0\u3e04\0\u3ea6\0\u3f48\0\u3fea\0\u408c\0\u412e\0\u41d0\0\u4272"+ + "\0\u4314\0\u43b6\0\u4458\0\u44fa\0\u459c\0\u463e\0\u46e0\0\u4782"+ + "\0\u4824\0\u48c6\0\u4968\0\u4a0a\0\u4aac\0\u4b4e\0\u4bf0\0\u4c92"+ + "\0\u4d34\0\u4dd6\0\u4e78\0\u4f1a\0\u4fbc\0\u505e\0\u5100\0\u51a2"+ + "\0\u5244\0\u52e6\0\u5388\0\u542a\0\u54cc\0\u556e\0\u5610\0\u56b2"+ + "\0\u5754\0\u57f6\0\u5898\0\u593a\0\u59dc\0\u5a7e\0\u5b20\0\u5bc2"+ + "\0\u5c64\0\u5d06\0\u5da8\0\u5e4a\0\u5eec\0\u5f8e\0\u6030\0\u60d2"+ + "\0\u6174\0\u6216\0\u62b8\0\u635a\0\u63fc\0\u649e\0\u6540\0\u65e2"+ + "\0\u6684\0\u6726\0\u67c8\0\u686a\0\u690c\0\u69ae\0\u6a50\0\u6af2"+ + "\0\u6b94\0\u6c36\0\u6cd8\0\u6d7a\0\u6e1c\0\u6ebe\0\u6f60\0\u7002"+ + "\0\u70a4\0\u7146\0\u71e8\0\u728a\0\u732c\0\u73ce\0\u7470\0\u7512"+ + "\0\u75b4\0\u7656\0\u76f8\0\u779a\0\u783c\0\u78de\0\u7980\0\u7a22"+ + "\0\242\0\u7ac4\0\u7b66\0\u7c08\0\u7caa\0\u7d4c\0\u7dee\0\u7e90"+ + "\0\u7f32\0\u7fd4\0\u8076\0\u8118\0\u81ba\0\u825c\0\u82fe\0\u83a0"+ + "\0\u8442\0\u84e4\0\u8586\0\u8628\0\u86ca\0\u876c\0\u880e\0\u88b0"+ + "\0\u8952\0\u89f4\0\u8a96\0\u8b38\0\u8bda\0\u8c7c\0\u8d1e\0\u8dc0"+ + "\0\u8e62\0\u8f04\0\u8fa6\0\u9048\0\u90ea\0\u918c\0\u922e\0\u92d0"+ + "\0\u9372\0\u9414\0\u94b6\0\u9558\0\u95fa\0\u969c\0\u973e\0\u97e0"+ + "\0\u9882\0\u9924\0\u99c6\0\u9a68\0\u9b0a\0\u9bac\0\u9c4e\0\u9cf0"+ + "\0\u9d92\0\u9e34\0\u9ed6\0\u9f78\0\ua01a\0\ua0bc\0\ua15e\0\ua200"+ + "\0\ua2a2\0\ua344\0\ua3e6\0\ua488\0\ua52a\0\ua5cc\0\ua66e\0\ua710"+ + "\0\ua7b2\0\ua854\0\ua8f6\0\ua998\0\uaa3a\0\uaadc\0\uab7e\0\uac20"+ + "\0\uacc2\0\uad64\0\uae06\0\uaea8\0\uaf4a\0\uafec\0\ub08e\0\ub130"+ + "\0\ub1d2\0\ub274\0\ub316\0\ub3b8\0\ub45a\0\ub4fc\0\ub59e\0\ub640"+ + "\0\ub6e2\0\ub784\0\ub826\0\ub8c8\0\ub96a\0\uba0c\0\ubaae\0\ubb50"+ + "\0\ubbf2\0\ubc94\0\ubd36\0\ubdd8\0\ube7a\0\ubf1c\0\ubfbe\0\uc060"+ + "\0\uc102\0\uc1a4\0\uc246\0\uc2e8\0\uc38a\0\uc42c\0\uc4ce\0\uc570"+ + "\0\uc612\0\uc6b4\0\uc756\0\uc7f8\0\uc89a\0\uc93c\0\uc9de\0\uca80"+ + "\0\ucb22\0\ucbc4\0\ucc66\0\ucd08\0\ucdaa\0\uce4c\0\uceee\0\ucf90"+ + "\0\ud032\0\ud0d4\0\ud176\0\ud218\0\ud2ba\0\ud35c\0\ud3fe\0\ud4a0"+ + "\0\ud542\0\ud5e4\0\ud686\0\ud728\0\ud7ca\0\ud86c\0\ud90e\0\ud9b0"+ + "\0\uda52\0\udaf4\0\udb96\0\udc38\0\udcda\0\udd7c\0\ude1e\0\udec0"+ + "\0\udf62\0\ue004\0\ue0a6\0\ue148\0\ue1ea\0\ue28c\0\ue32e\0\ue3d0"+ + "\0\ue472\0\ue514\0\ue5b6\0\ue658\0\ue6fa\0\ue79c\0\ue83e\0\ue8e0"+ + "\0\ue982\0\uea24\0\ueac6\0\ueb68\0\uec0a\0\uecac\0\ued4e\0\uedf0"+ + "\0\u7980\0\uee92\0\uef34\0\uefd6\0\uf078\0\uf11a\0\uf1bc\0\uf25e"+ + "\0\uf300\0\uf3a2\0\uf444\0\uf4e6\0\uf588\0\uf62a\0\uf6cc\0\uf76e"+ + "\0\uf810\0\uf8b2\0\uf954\0\uf9f6\0\ufa98\0\ufb3a\0\ufbdc\0\ufc7e"+ + "\0\ufd20\0\ufdc2\0\ufe64\0\uff06\0\uffa8\1\112\1\354\1\u018e"+ + "\1\u0230\1\u02d2\1\u0374\1\u0416\1\u04b8\1\u055a\1\u05fc\1\u069e"+ + "\1\u0740\1\u07e2\1\u0884\1\u0926\1\u09c8\1\u0a6a\1\u0b0c\1\u0bae"+ + "\1\u0c50\1\u0cf2\1\u0d94\1\u0e36\1\u0ed8\1\u0f7a\1\u101c\1\u10be"+ + "\1\u1160\1\u1202\1\u12a4\1\u1346\1\u13e8\1\u148a\1\u152c\1\u15ce"+ + "\1\u1670\1\u1712\1\u17b4\1\u1856\1\u18f8\1\u199a\1\u1a3c\1\u1ade"+ + "\1\u1b80\1\u1c22\1\u1cc4\1\u1d66\1\u1e08\1\u1eaa\1\u1f4c\1\u1fee"+ + "\1\u2090\1\u2132\1\u21d4\1\u2276\1\u2318\1\u23ba\1\u245c\1\u24fe"+ + "\1\u25a0\1\u2642\1\u26e4\1\u2786\1\u2828\1\u28ca\1\u296c\1\u2a0e"+ + "\1\u2ab0\1\u2b52\1\u2bf4\1\u2c96\1\u2d38\1\u2dda\0\u14e2\1\u2e7c"+ + "\1\u2f1e\1\u2fc0\1\u3062\1\u3104\1\u31a6\1\u3248\1\u32ea\1\u338c"+ + "\1\u342e\1\u34d0\1\u3572\1\u3614\1\u36b6\1\u3758\1\u37fa\1\u389c"+ + "\1\u393e\1\u39e0\1\u3a82\1\u3b24\1\u3bc6\1\u3c68\1\u3d0a\1\u3dac"+ + "\1\u3e4e\1\u3ef0\1\u3f92\1\u4034\1\u40d6\1\u4178\1\u421a\1\u42bc"+ + "\1\u435e\1\u4400\1\u44a2\1\u4544\1\u45e6\1\u4688\1\u472a\1\u47cc"+ + "\1\u486e\1\u4910\1\u49b2\1\u4a54\1\u4af6\1\u4b98\1\u4c3a\1\u4cdc"+ + "\1\u4d7e\1\u4e20\1\u4ec2\1\u4f64\1\u5006\1\u50a8\1\u514a\1\u51ec"+ + "\1\u528e\1\u5330\1\u53d2\1\u5474\1\u5516\1\u55b8\1\u565a\1\u56fc"+ + "\1\u579e\1\u5840\1\u58e2\1\u5984\1\u5a26\1\u5ac8\1\u5b6a\1\u5c0c"+ + "\1\u5cae\1\u5d50\1\u5df2\1\u5e94\1\u5f36\1\u5fd8\1\u607a\1\u611c"+ + "\1\u61be\1\u6260\1\u6302\1\u63a4\1\u6446\1\u64e8\1\u658a\1\u662c"+ + "\1\u66ce\1\u6770\1\u6812\1\u68b4\1\u6956\1\u69f8\1\u6a9a\1\u6b3c"+ + "\1\u6bde\1\u6c80\1\u6d22\1\u6dc4\1\u6e66\1\u6f08\1\u6faa\1\u704c"+ + "\1\u70ee\1\u7190\1\u7232\1\u72d4\1\u7376\1\u7418\1\u74ba\1\u755c"+ + "\1\u75fe\1\u76a0\1\u7742\1\u77e4\1\u7886\1\u7928\1\u79ca\1\u7a6c"+ + "\1\u7b0e\1\u7bb0\1\u7c52\1\u7cf4\1\u7d96\1\u7e38\1\u7eda\1\u7f7c"+ + "\1\u801e\1\u80c0\1\u8162\1\u8204\1\u82a6\1\u8348\1\u83ea\1\u848c"+ + "\1\u852e\1\u85d0\1\u8672\1\u8714\1\u87b6\1\u8858\1\u88fa\1\u899c"+ + "\1\u8a3e\1\u8ae0\1\u8b82\1\u8c24\1\u8cc6\1\u8d68\1\u8e0a\1\u8eac"+ + "\1\u8f4e\1\u8ff0\1\u9092\1\u9134\1\u91d6\1\u9278\1\u931a\1\u93bc"+ + "\1\u945e\1\u9500\1\u95a2\1\u9644\1\u96e6\1\u9788\1\u982a\1\u98cc"+ + "\1\u996e\1\u9a10\1\u9ab2\1\u9b54\1\u9bf6\1\u9c98\1\u9d3a\1\u9ddc"+ + "\1\u9e7e\1\u9f20\1\u9fc2\1\ua064\1\ua106\1\ua1a8\1\ua24a\1\ua2ec"+ + "\1\ua38e\1\ua430\1\ua4d2\1\ua574\1\ua616\1\ua6b8\1\ua75a\1\ua7fc"+ + "\1\ua89e\1\ua940\1\ua9e2\1\uaa84\1\uab26\1\uabc8\1\uac6a\1\uad0c"+ + "\1\uadae\1\uae50\1\uaef2\1\uaf94\1\ub036\1\ub0d8\1\ub17a\1\ub21c"+ + "\1\ub2be\1\ub360\1\ub402\1\ub4a4\1\ub546\1\ub5e8\1\ub68a\1\ub72c"+ + "\1\ub7ce\1\ub870\1\ub912\1\ub9b4\1\uba56\1\ubaf8\1\ubb9a\1\ubc3c"+ + "\1\ubcde\1\ubd80\1\ube22\1\ubec4\1\ubf66\1\uc008\1\uc0aa\1\uc14c"+ + "\1\uc1ee\1\uc290\1\uc332\1\uc3d4\1\uc476\1\uc518\1\uc5ba\1\uc65c"+ + "\1\uc6fe\1\uc7a0\1\uc842\1\uc8e4\1\uc986\1\uca28\1\ucaca\1\ucb6c"+ + "\1\ucc0e\1\uccb0\1\ucd52\1\ucdf4\1\uce96\1\ucf38\1\ucfda\1\ud07c"+ + "\1\ud11e\1\ud1c0\1\ud262\1\ud304\1\ud3a6\1\ud448\1\ud4ea\1\ud58c"+ + "\1\ud62e\1\ud6d0\1\ud772\1\ud814\1\ud8b6\1\ud958\1\ud9fa\1\uda9c"+ + "\1\udb3e\1\udbe0\1\udc82\1\udd24\1\uddc6\1\ude68\1\udf0a\1\udfac"+ + "\1\ue04e\1\ue0f0\1\ue192\1\ue234\1\ue2d6\1\ue378\1\ue41a\1\ue4bc"+ + "\1\ue55e\1\ue600\1\ue6a2\1\ue744\1\ue7e6\1\ue888\1\ue92a\1\ue9cc"+ + "\1\uea6e\1\ueb10\1\uebb2\1\uec54\1\uecf6\1\ued98\1\uee3a\1\ueedc"+ + "\1\uef7e\1\uf020\1\uf0c2\1\uf164\1\uf206\1\uf2a8\1\uf34a\1\uf3ec"+ + "\1\uf48e\1\uf530\1\uf5d2\1\uf674\1\uf716\1\uf7b8\1\uf85a\1\uf8fc"+ + "\1\uf99e\1\ufa40\1\ufae2\1\ufb84\1\ufc26\1\ufcc8\1\ufd6a\1\ufe0c"+ + "\1\ufeae\1\uff50\1\ufff2\2\224\2\u0136\2\u01d8\2\u027a\2\u031c"+ + "\2\u03be\2\u0460\2\u0502\2\u05a4\2\u0646\2\u06e8\2\u078a\2\u082c"+ + "\2\u08ce\2\u0970\2\u0a12\2\u0ab4\2\u0b56\2\u0bf8\2\u0c9a\2\u0d3c"+ + "\2\u0dde\2\u0e80\2\u0f22\2\u0fc4\2\u1066\2\u1108\2\u11aa\2\u124c"+ + "\2\u12ee\2\u1390\2\u1432\2\u14d4\2\u1576\2\u1618\2\u16ba\2\u175c"+ + "\2\u17fe\2\u18a0\2\u1942\2\u19e4\2\u1a86\2\u1b28\2\u1bca\2\u1c6c"+ + "\2\u1d0e\2\u1db0\2\u1e52\2\u1ef4\2\u1f96\2\u2038\2\u20da\2\u217c"+ + "\2\u221e\2\u22c0\2\u2362\2\u2404\2\u24a6\2\u2548\2\u25ea\2\u268c"+ + "\2\u272e\2\u27d0\2\u2872\2\u2914\2\u29b6\2\u2a58\2\u2afa\2\u2b9c"+ + "\2\u2c3e\2\u2ce0\2\u2d82\2\u2e24\2\u2ec6\2\u2f68\2\u300a\2\u30ac"+ + "\2\u314e\2\u31f0\2\u3292\2\u3334\2\u33d6\2\u3478\2\u351a\2\u35bc"+ + "\2\u365e\2\u3700\2\u37a2\2\u3844\2\u38e6\2\u3988\2\u3a2a\2\u3acc"+ + "\2\u3b6e\2\u3c10\2\u3cb2\2\u3d54\2\u3df6\2\u3e98\2\u3f3a\2\u3fdc"+ + "\2\u407e\2\u4120\2\u41c2\2\u4264\2\u4306\2\u43a8\2\u444a\2\u44ec"+ + "\2\u458e\2\u4630\2\u46d2\2\u4774\2\u4816\2\u48b8\2\u495a\2\u49fc"+ + "\2\u4a9e\2\u4b40\2\u4be2\2\u4c84\2\u4d26\2\u4dc8\2\u4e6a\2\u4f0c"+ + "\2\u4fae\2\u5050\2\u50f2\2\u5194\2\u5236\2\u52d8\2\u537a\2\u541c"+ + "\2\u54be\2\u5560\2\u5602\2\u56a4\2\u5746\2\u57e8\2\u588a\2\u592c"+ + "\2\u59ce\2\u5a70\2\u5b12\2\u5bb4\2\u5c56\2\u5cf8\2\u5d9a\2\u5e3c"+ + "\2\u5ede\2\u5f80\2\u6022\2\u60c4\2\u6166\2\u6208\2\u62aa\2\u634c"+ + "\2\u63ee\2\u6490\2\u6532\2\u65d4\2\u6676\2\u6718\2\u67ba\2\u685c"+ + "\2\u68fe\2\u69a0\2\u6a42\2\u6ae4\2\u6b86\2\u6c28\2\u6cca\2\u6d6c"+ + "\2\u6e0e\2\u6eb0\2\u6f52\2\u6ff4\2\u7096\2\u7138\2\u71da\2\u727c"+ + "\2\u731e\2\u73c0\2\u7462\2\u7504\2\u75a6\2\u7648\2\u76ea\2\u778c"+ + "\2\u782e\2\u78d0\2\u7972\2\u7a14\2\u7ab6\2\u7b58\2\u7bfa\2\u7c9c"+ + "\2\u7d3e\2\u7de0\2\u7e82\2\u7f24\2\u7fc6\2\u8068\2\u810a\2\u81ac"+ + "\2\u824e\2\u82f0\2\u8392\2\u8434\2\u84d6\2\u8578\2\u861a\2\u86bc"+ + "\2\u875e\2\u8800\2\u88a2\2\u8944\2\u89e6\2\u8a88\2\u8b2a\2\u8bcc"+ + "\2\u8c6e\2\u8d10\2\u8db2\2\u8e54\2\u8ef6\2\u8f98\2\u903a\2\u90dc"+ + "\2\u917e\2\u9220\2\u92c2\2\u9364\2\u9406\2\u94a8\2\u954a\2\u95ec"+ + "\2\u968e\2\u9730\2\u97d2\2\u9874\2\u9916\2\u99b8\2\u9a5a\2\u9afc"+ + "\2\u9b9e\2\u9c40\2\u9ce2\2\u9d84\2\u9e26\2\u9ec8\2\u9f6a\2\ua00c"+ + "\2\ua0ae\2\ua150\2\ua1f2\2\ua294\2\ua336\2\ua3d8\2\ua47a\2\ua51c"+ + "\2\ua5be\2\ua660\2\ua702\2\ua7a4\2\ua846\2\ua8e8\2\ua98a\2\uaa2c"+ + "\2\uaace\2\uab70\2\uac12\2\uacb4\2\uad56\2\uadf8\2\uae9a\2\uaf3c"+ + "\2\uafde\2\ub080\2\ub122\2\ub1c4\2\ub266\2\ub308\2\ub3aa\2\ub44c"+ + "\2\ub4ee\2\ub590\2\ub632\2\ub6d4\2\ub776\2\ub818\2\ub8ba\2\ub95c"+ + "\2\ub9fe\2\ubaa0\2\ubb42\2\ubbe4\2\ubc86\2\ubd28\2\ubdca\2\ube6c"+ + "\2\ubf0e\2\ubfb0\2\uc052\2\uc0f4\2\uc196\2\uc238\2\uc2da\2\uc37c"+ + "\2\uc41e\2\uc4c0\2\uc562\2\uc604\2\uc6a6\2\uc748\2\uc7ea\2\uc88c"+ + "\2\uc92e\2\uc9d0\2\uca72\2\ucb14\2\ucbb6\2\ucc58\2\uccfa\2\ucd9c"+ + "\2\uce3e\2\ucee0\2\ucf82\2\ud024\2\ud0c6\2\ud168\2\ud20a\2\ud2ac"+ + "\2\ud34e\2\ud3f0\2\ud492\2\ud534\2\ud5d6\2\ud678\2\ud71a\2\ud7bc"+ + "\2\ud85e\2\ud900\2\ud9a2\2\uda44\2\udae6\2\udb88\2\udc2a\2\udccc"+ + "\2\udd6e\2\ude10\2\udeb2\2\udf54\2\udff6\2\ue098\2\ue13a\2\ue1dc"+ + "\2\ue27e\2\ue320\2\ue3c2\2\ue464\2\ue506\2\ue5a8\2\ue64a\2\ue6ec"+ + "\2\ue78e\2\ue830\2\ue8d2\2\ue974\2\uea16\2\ueab8\2\ueb5a\2\uebfc"+ + "\2\uec9e\2\ued40\2\uede2\2\uee84"; private static int [] zzUnpackRowMap() { - int [] result = new int[1117]; + int [] result = new int[1204]; int offset = 0; offset = zzUnpackRowMap(ZZ_ROWMAP_PACKED_0, offset, result); return result; @@ -419,1848 +447,2475 @@ public final class UAX29URLEmailTokenizer extends Tokenizer { private static final int [] ZZ_TRANS = zzUnpackTrans(); private static final String ZZ_TRANS_PACKED_0 = - "\1\2\1\3\1\2\1\4\1\5\3\2\1\6\1\7"+ - "\6\10\1\11\16\10\1\12\4\10\1\7\1\13\2\14"+ - "\1\13\4\14\1\15\1\14\1\2\1\7\1\16\1\7"+ - "\1\2\2\7\1\2\3\7\1\17\2\2\1\7\1\20"+ - "\3\2\2\7\1\2\2\21\1\22\1\23\112\0\2\3"+ - "\1\24\1\0\1\25\1\0\1\25\1\26\1\25\32\3"+ - "\1\0\12\24\1\25\1\0\1\26\3\0\1\25\20\0"+ - "\1\3\3\0\1\3\2\4\2\0\2\27\1\30\1\27"+ - "\32\3\1\0\12\4\2\0\1\30\2\0\2\27\6\0"+ - "\1\27\11\0\1\4\4\0\1\5\1\0\1\5\3\0"+ - "\1\26\50\0\1\26\24\0\1\5\3\0\1\3\1\31"+ - "\1\4\1\5\3\0\1\31\1\0\32\3\1\0\12\4"+ - "\2\0\1\31\24\0\1\31\13\0\1\32\45\33\1\0"+ - "\3\33\1\0\2\33\1\34\3\33\3\0\1\33\4\0"+ - "\2\33\6\0\2\3\1\24\1\0\1\25\1\0\1\25"+ - "\1\26\1\35\32\10\1\36\12\37\1\25\1\33\1\40"+ - "\1\33\1\0\1\33\1\41\1\34\3\33\3\0\1\33"+ - "\4\0\2\33\2\0\1\3\3\0\2\3\1\24\1\0"+ - "\1\25\1\0\1\25\1\26\1\35\10\10\1\42\6\10"+ - "\1\43\12\10\1\36\12\37\1\25\1\33\1\40\1\33"+ - "\1\0\1\33\1\41\1\34\3\33\3\0\1\33\4\0"+ - "\2\33\2\0\1\3\3\0\2\3\1\24\1\0\1\25"+ - "\1\0\1\25\1\26\1\35\17\10\1\44\12\10\1\36"+ - "\12\37\1\25\1\33\1\40\1\33\1\0\1\33\1\41"+ - "\1\34\3\33\3\0\1\33\4\0\2\33\2\0\1\3"+ - "\3\0\1\3\2\4\2\0\2\27\1\30\1\45\32\10"+ - "\1\36\12\14\1\0\1\33\1\46\1\33\1\0\2\47"+ - "\1\34\3\33\2\0\1\27\1\33\4\0\2\33\2\0"+ - "\1\4\3\0\1\3\2\4\2\0\2\27\1\30\1\45"+ - "\32\10\1\36\12\50\1\0\1\33\1\46\1\33\1\0"+ - "\2\47\1\34\3\33\2\0\1\27\1\33\4\0\2\33"+ - "\2\0\1\4\3\0\1\3\2\4\2\0\2\27\1\30"+ - "\1\45\32\10\1\36\1\14\1\51\1\50\2\14\2\50"+ - "\2\14\1\50\1\0\1\33\1\46\1\33\1\0\2\47"+ - "\1\34\3\33\2\0\1\27\1\33\4\0\2\33\2\0"+ - "\1\4\3\0\1\3\1\31\1\4\1\5\3\0\1\31"+ - "\1\32\32\52\1\33\12\53\1\0\1\33\1\54\1\33"+ - "\1\0\2\33\1\34\3\33\3\0\1\33\4\0\2\33"+ - "\2\0\1\31\14\0\4\55\2\0\1\55\15\0\1\55"+ - "\6\0\12\55\1\56\42\0\65\57\1\60\1\57\1\61"+ - "\1\0\2\57\112\0\2\21\3\0\1\3\2\24\2\0"+ - "\2\62\1\26\1\62\32\3\1\0\12\24\2\0\1\26"+ - "\2\0\2\62\6\0\1\62\11\0\1\24\3\0\1\3"+ - "\1\25\7\0\32\3\42\0\1\25\3\0\1\3\1\26"+ - "\1\24\1\5\3\0\1\26\1\0\32\3\1\0\12\24"+ - "\2\0\1\26\24\0\1\26\4\0\1\27\1\4\41\0"+ - "\12\4\27\0\1\27\3\0\1\3\1\30\1\4\1\5"+ - "\3\0\1\30\1\0\32\3\1\0\12\4\2\0\1\30"+ - "\24\0\1\30\13\0\1\32\45\33\1\0\3\33\1\0"+ - "\2\33\1\34\3\33\3\0\1\33\1\57\3\0\2\33"+ - "\17\0\32\63\1\0\12\63\13\0\1\64\17\0\1\3"+ - "\1\25\6\0\1\32\1\65\1\66\1\67\1\70\1\71"+ - "\1\72\1\73\1\74\1\75\1\76\1\77\1\100\1\101"+ - "\1\102\1\103\1\104\1\105\1\106\1\107\1\110\1\111"+ - "\1\112\1\113\1\114\1\115\1\116\1\33\12\117\1\0"+ - "\3\33\1\0\2\33\1\34\3\33\3\0\1\33\1\57"+ - "\3\0\2\33\2\0\1\25\13\0\1\32\32\117\1\36"+ - "\12\117\1\0\3\33\1\0\2\33\1\34\3\33\3\0"+ - "\1\33\4\0\2\33\6\0\1\3\2\24\2\0\2\62"+ - "\1\26\1\120\32\10\1\36\12\37\1\0\1\33\1\40"+ - "\1\33\1\0\2\121\1\34\3\33\2\0\1\62\1\33"+ - "\4\0\2\33\2\0\1\24\3\0\1\3\1\26\1\24"+ - "\1\5\3\0\1\26\1\32\32\52\1\33\12\122\1\0"+ - "\1\33\1\40\1\33\1\0\2\33\1\34\3\33\3\0"+ - "\1\33\4\0\2\33\2\0\1\26\3\0\1\3\1\25"+ - "\6\0\1\32\32\52\13\33\1\0\3\33\1\0\2\33"+ - "\1\34\3\33\3\0\1\33\4\0\2\33\2\0\1\25"+ - "\3\0\2\3\1\24\1\0\1\25\1\0\1\25\1\26"+ - "\1\35\11\10\1\123\20\10\1\36\12\37\1\25\1\33"+ - "\1\40\1\33\1\0\1\33\1\41\1\34\3\33\3\0"+ - "\1\33\4\0\2\33\2\0\1\3\3\0\2\3\1\24"+ - "\1\0\1\25\1\0\1\25\1\26\1\35\15\10\1\124"+ - "\14\10\1\36\12\37\1\25\1\33\1\40\1\33\1\0"+ - "\1\33\1\41\1\34\3\33\3\0\1\33\4\0\2\33"+ - "\2\0\1\3\3\0\2\3\1\24\1\0\1\25\1\0"+ - "\1\25\1\26\1\35\17\10\1\125\12\10\1\36\12\37"+ - "\1\25\1\33\1\40\1\33\1\0\1\33\1\41\1\34"+ - "\3\33\3\0\1\33\4\0\2\33\2\0\1\3\4\0"+ - "\1\27\1\4\5\0\1\32\1\126\1\127\1\130\1\131"+ - "\1\132\1\133\1\134\1\135\1\136\1\137\1\140\1\141"+ - "\1\142\1\143\1\144\1\145\1\146\1\147\1\150\1\151"+ - "\1\152\1\153\1\154\1\155\1\156\1\157\1\33\1\160"+ - "\2\161\1\160\4\161\1\162\1\161\1\0\3\33\1\0"+ - "\2\33\1\34\3\33\3\0\1\33\1\57\3\0\2\33"+ - "\2\0\1\27\3\0\1\3\1\30\1\4\1\5\3\0"+ - "\1\30\1\32\32\52\1\33\12\53\1\0\1\33\1\46"+ - "\1\33\1\0\2\33\1\34\3\33\3\0\1\33\4\0"+ - "\2\33\2\0\1\30\4\0\1\27\1\4\5\0\1\32"+ - "\33\33\12\53\1\0\3\33\1\0\2\33\1\34\3\33"+ - "\3\0\1\33\4\0\2\33\2\0\1\27\3\0\1\3"+ - "\2\4\2\0\2\27\1\30\1\45\32\10\1\36\12\163"+ - "\1\0\1\33\1\46\1\33\1\0\2\47\1\34\3\33"+ - "\2\0\1\27\1\33\4\0\2\33\2\0\1\4\3\0"+ - "\1\3\2\4\2\0\2\27\1\30\1\45\32\10\1\36"+ - "\2\50\1\163\2\50\2\163\2\50\1\163\1\0\1\33"+ - "\1\46\1\33\1\0\2\47\1\34\3\33\2\0\1\27"+ - "\1\33\4\0\2\33\2\0\1\4\3\0\2\3\1\24"+ - "\1\0\1\25\1\0\1\25\1\26\1\164\32\52\1\33"+ - "\12\122\1\25\1\33\1\40\1\33\1\0\1\33\1\41"+ - "\1\34\3\33\3\0\1\33\4\0\2\33\2\0\1\3"+ - "\3\0\1\3\2\4\2\0\2\27\1\30\1\165\32\52"+ - "\1\33\12\53\1\0\1\33\1\46\1\33\1\0\2\47"+ - "\1\34\3\33\2\0\1\27\1\33\4\0\2\33\2\0"+ - "\1\4\14\0\4\166\2\0\1\166\15\0\1\166\6\0"+ - "\12\166\1\167\110\0\1\170\42\0\1\171\54\0\1\34"+ - "\33\0\74\57\6\0\1\62\1\24\41\0\12\24\27\0"+ - "\1\62\13\0\1\172\32\63\1\173\12\63\43\0\61\64"+ - "\1\0\1\174\4\64\1\175\1\0\3\64\5\0\2\3"+ - "\1\24\1\0\1\25\1\0\1\25\1\26\1\35\1\10"+ - "\2\176\1\177\1\200\10\176\1\10\1\201\5\176\6\10"+ - "\1\36\12\37\1\25\1\33\1\40\1\33\1\0\1\33"+ - "\1\41\1\34\3\33\3\0\1\33\4\0\2\33\2\0"+ - "\1\3\3\0\2\3\1\24\1\0\1\25\1\0\1\25"+ - "\1\26\1\35\1\202\2\176\1\10\1\176\1\203\6\176"+ - "\4\10\1\176\1\10\2\176\1\10\1\176\1\10\3\176"+ - "\1\36\12\37\1\25\1\33\1\40\1\33\1\0\1\33"+ - "\1\41\1\34\3\33\3\0\1\33\4\0\2\33\2\0"+ - "\1\3\3\0\2\3\1\24\1\0\1\25\1\0\1\25"+ - "\1\26\1\35\3\10\1\176\1\10\1\176\4\10\1\176"+ - "\10\10\1\176\2\10\1\176\2\10\1\176\1\36\12\37"+ - "\1\25\1\33\1\40\1\33\1\0\1\33\1\41\1\34"+ - "\3\33\3\0\1\33\4\0\2\33\2\0\1\3\3\0"+ - "\2\3\1\24\1\0\1\25\1\0\1\25\1\26\1\35"+ - "\1\10\1\176\1\204\2\176\2\10\1\176\6\10\3\176"+ - "\11\10\1\36\12\37\1\25\1\33\1\40\1\33\1\0"+ - "\1\33\1\41\1\34\3\33\3\0\1\33\4\0\2\33"+ - "\2\0\1\3\3\0\2\3\1\24\1\0\1\25\1\0"+ - "\1\25\1\26\1\35\3\10\1\176\1\10\1\176\10\10"+ - "\1\176\1\10\2\176\10\10\1\36\12\37\1\25\1\33"+ - "\1\40\1\33\1\0\1\33\1\41\1\34\3\33\3\0"+ - "\1\33\4\0\2\33\2\0\1\3\3\0\2\3\1\24"+ - "\1\0\1\25\1\0\1\25\1\26\1\35\4\10\1\205"+ - "\5\10\1\176\17\10\1\36\12\37\1\25\1\33\1\40"+ - "\1\33\1\0\1\33\1\41\1\34\3\33\3\0\1\33"+ - "\4\0\2\33\2\0\1\3\3\0\2\3\1\24\1\0"+ - "\1\25\1\0\1\25\1\26\1\35\4\10\2\176\2\10"+ - "\1\176\1\10\1\176\13\10\1\176\2\10\1\176\1\36"+ - "\12\37\1\25\1\33\1\40\1\33\1\0\1\33\1\41"+ - "\1\34\3\33\3\0\1\33\4\0\2\33\2\0\1\3"+ - "\3\0\2\3\1\24\1\0\1\25\1\0\1\25\1\26"+ - "\1\35\1\176\1\10\3\176\1\206\14\176\2\10\2\176"+ - "\2\10\1\176\1\10\1\36\12\37\1\25\1\33\1\40"+ - "\1\33\1\0\1\33\1\41\1\34\3\33\3\0\1\33"+ - "\4\0\2\33\2\0\1\3\3\0\2\3\1\24\1\0"+ - "\1\25\1\0\1\25\1\26\1\35\2\10\4\176\3\10"+ - "\2\176\1\207\1\176\1\10\2\176\12\10\1\36\12\37"+ - "\1\25\1\33\1\40\1\33\1\0\1\33\1\41\1\34"+ - "\3\33\3\0\1\33\4\0\2\33\2\0\1\3\3\0"+ - "\2\3\1\24\1\0\1\25\1\0\1\25\1\26\1\35"+ - "\2\176\2\10\1\176\3\10\1\176\5\10\3\176\3\10"+ - "\1\176\2\10\3\176\1\36\12\37\1\25\1\33\1\40"+ - "\1\33\1\0\1\33\1\41\1\34\3\33\3\0\1\33"+ - "\4\0\2\33\2\0\1\3\3\0\2\3\1\24\1\0"+ - "\1\25\1\0\1\25\1\26\1\35\5\176\1\210\1\10"+ - "\1\176\1\211\7\176\1\212\3\176\1\10\1\176\1\10"+ - "\3\176\1\36\12\37\1\25\1\33\1\40\1\33\1\0"+ - "\1\33\1\41\1\34\3\33\3\0\1\33\4\0\2\33"+ - "\2\0\1\3\3\0\2\3\1\24\1\0\1\25\1\0"+ - "\1\25\1\26\1\35\1\213\1\176\1\10\1\202\6\176"+ - "\3\10\1\176\2\10\1\176\2\10\1\176\6\10\1\36"+ - "\12\37\1\25\1\33\1\40\1\33\1\0\1\33\1\41"+ - "\1\34\3\33\3\0\1\33\4\0\2\33\2\0\1\3"+ - "\3\0\2\3\1\24\1\0\1\25\1\0\1\25\1\26"+ - "\1\35\1\176\31\10\1\36\12\37\1\25\1\33\1\40"+ - "\1\33\1\0\1\33\1\41\1\34\3\33\3\0\1\33"+ - "\4\0\2\33\2\0\1\3\3\0\2\3\1\24\1\0"+ - "\1\25\1\0\1\25\1\26\1\35\1\176\2\10\1\176"+ - "\1\214\1\10\2\176\1\10\3\176\2\10\2\176\1\10"+ - "\1\176\3\10\1\176\2\10\2\176\1\36\12\37\1\25"+ - "\1\33\1\40\1\33\1\0\1\33\1\41\1\34\3\33"+ - "\3\0\1\33\4\0\2\33\2\0\1\3\3\0\2\3"+ - "\1\24\1\0\1\25\1\0\1\25\1\26\1\35\6\176"+ - "\1\10\5\176\3\10\2\176\2\10\7\176\1\36\12\37"+ - "\1\25\1\33\1\40\1\33\1\0\1\33\1\41\1\34"+ - "\3\33\3\0\1\33\4\0\2\33\2\0\1\3\3\0"+ - "\2\3\1\24\1\0\1\25\1\0\1\25\1\26\1\35"+ - "\1\10\2\176\1\211\1\215\3\176\1\10\3\176\1\10"+ - "\1\176\1\10\1\176\1\10\1\176\1\10\1\176\1\10"+ - "\3\176\1\10\1\176\1\36\12\37\1\25\1\33\1\40"+ - "\1\33\1\0\1\33\1\41\1\34\3\33\3\0\1\33"+ - "\4\0\2\33\2\0\1\3\3\0\2\3\1\24\1\0"+ - "\1\25\1\0\1\25\1\26\1\35\1\176\6\10\1\176"+ - "\6\10\1\176\4\10\1\176\4\10\2\176\1\36\12\37"+ - "\1\25\1\33\1\40\1\33\1\0\1\33\1\41\1\34"+ - "\3\33\3\0\1\33\4\0\2\33\2\0\1\3\3\0"+ - "\2\3\1\24\1\0\1\25\1\0\1\25\1\26\1\35"+ - "\6\10\1\176\7\10\1\176\13\10\1\36\12\37\1\25"+ - "\1\33\1\40\1\33\1\0\1\33\1\41\1\34\3\33"+ - "\3\0\1\33\4\0\2\33\2\0\1\3\3\0\2\3"+ - "\1\24\1\0\1\25\1\0\1\25\1\26\1\35\13\10"+ - "\1\216\16\10\1\36\12\37\1\25\1\33\1\40\1\33"+ - "\1\0\1\33\1\41\1\34\3\33\3\0\1\33\4\0"+ - "\2\33\2\0\1\3\3\0\2\3\1\24\1\0\1\25"+ - "\1\0\1\25\1\26\1\35\1\176\11\10\1\176\6\10"+ - "\1\176\10\10\1\36\12\37\1\25\1\33\1\40\1\33"+ - "\1\0\1\33\1\41\1\34\3\33\3\0\1\33\4\0"+ - "\2\33\2\0\1\3\3\0\2\3\1\24\1\0\1\25"+ - "\1\0\1\25\1\26\1\35\1\176\1\10\6\176\1\217"+ - "\1\10\2\176\2\10\2\176\1\10\1\176\1\10\6\176"+ - "\1\10\1\36\12\37\1\25\1\33\1\40\1\33\1\0"+ - "\1\33\1\41\1\34\3\33\3\0\1\33\4\0\2\33"+ - "\2\0\1\3\3\0\2\3\1\24\1\0\1\25\1\0"+ - "\1\25\1\26\1\35\4\10\1\176\5\10\2\176\3\10"+ - "\2\176\10\10\1\176\1\36\12\37\1\25\1\33\1\40"+ - "\1\33\1\0\1\33\1\41\1\34\3\33\3\0\1\33"+ - "\4\0\2\33\2\0\1\3\3\0\2\3\1\24\1\0"+ - "\1\25\1\0\1\25\1\26\1\35\3\10\1\176\1\10"+ - "\1\220\4\10\1\176\2\10\1\176\14\10\1\36\12\37"+ - "\1\25\1\33\1\40\1\33\1\0\1\33\1\41\1\34"+ - "\3\33\3\0\1\33\4\0\2\33\2\0\1\3\3\0"+ - "\2\3\1\24\1\0\1\25\1\0\1\25\1\26\1\35"+ - "\2\176\1\10\1\176\3\10\2\176\2\10\1\176\4\10"+ - "\1\176\11\10\1\36\12\37\1\25\1\33\1\40\1\33"+ - "\1\0\1\33\1\41\1\34\3\33\3\0\1\33\4\0"+ - "\2\33\2\0\1\3\3\0\2\3\1\24\1\0\1\25"+ - "\1\0\1\25\1\26\1\35\3\10\1\176\13\10\1\176"+ - "\12\10\1\36\12\37\1\25\1\33\1\40\1\33\1\0"+ - "\1\33\1\41\1\34\3\33\3\0\1\33\4\0\2\33"+ - "\2\0\1\3\3\0\2\3\1\24\1\0\1\25\1\0"+ - "\1\25\1\26\1\35\3\10\2\176\2\10\2\176\1\10"+ - "\2\176\1\10\1\176\3\10\1\176\1\10\1\176\1\10"+ - "\1\176\2\10\1\176\1\10\1\36\12\37\1\25\1\33"+ - "\1\40\1\33\1\0\1\33\1\41\1\34\3\33\3\0"+ - "\1\33\4\0\2\33\2\0\1\3\13\0\1\221\32\117"+ - "\1\36\12\117\1\0\3\33\1\0\2\33\1\34\3\33"+ - "\3\0\1\33\4\0\2\33\7\0\1\62\1\24\5\0"+ - "\1\32\1\126\1\127\1\130\1\131\1\132\1\133\1\134"+ - "\1\135\1\136\1\137\1\140\1\141\1\142\1\143\1\144"+ - "\1\145\1\146\1\147\1\150\1\151\1\152\1\153\1\154"+ - "\1\155\1\156\1\157\1\33\12\37\1\0\3\33\1\0"+ - "\2\33\1\34\3\33\3\0\1\33\1\57\3\0\2\33"+ - "\2\0\1\62\4\0\1\62\1\24\5\0\1\32\33\33"+ - "\12\122\1\0\3\33\1\0\2\33\1\34\3\33\3\0"+ - "\1\33\4\0\2\33\2\0\1\62\3\0\1\3\2\24"+ - "\2\0\2\62\1\26\1\222\32\52\1\33\12\122\1\0"+ - "\1\33\1\40\1\33\1\0\2\121\1\34\3\33\2\0"+ - "\1\62\1\33\4\0\2\33\2\0\1\24\3\0\2\3"+ - "\1\24\1\0\1\25\1\0\1\25\1\26\1\35\3\10"+ - "\1\223\26\10\1\36\12\37\1\25\1\33\1\40\1\33"+ - "\1\0\1\33\1\41\1\34\3\33\3\0\1\33\4\0"+ - "\2\33\2\0\1\3\3\0\2\3\1\24\1\0\1\25"+ - "\1\0\1\25\1\26\1\35\32\10\1\36\12\37\1\224"+ - "\1\33\1\40\1\33\1\0\1\33\1\41\1\34\3\33"+ - "\3\0\1\33\4\0\2\33\2\0\1\3\3\0\2\3"+ - "\1\24\1\0\1\25\1\0\1\25\1\26\1\35\15\10"+ - "\1\225\14\10\1\36\12\37\1\25\1\33\1\40\1\33"+ - "\1\0\1\33\1\41\1\34\3\33\3\0\1\33\4\0"+ - "\2\33\2\0\1\3\13\0\1\221\1\117\2\226\1\227"+ - "\1\230\10\226\1\117\1\231\5\226\6\117\1\36\12\117"+ - "\1\0\3\33\1\0\2\33\1\34\3\33\3\0\1\33"+ - "\4\0\2\33\16\0\1\221\1\232\2\226\1\117\1\226"+ - "\1\233\6\226\4\117\1\226\1\117\2\226\1\117\1\226"+ - "\1\117\3\226\1\36\12\117\1\0\3\33\1\0\2\33"+ - "\1\34\3\33\3\0\1\33\4\0\2\33\16\0\1\221"+ - "\3\117\1\226\1\117\1\226\4\117\1\226\10\117\1\226"+ - "\2\117\1\226\2\117\1\226\1\36\12\117\1\0\3\33"+ - "\1\0\2\33\1\34\3\33\3\0\1\33\4\0\2\33"+ - "\16\0\1\221\1\117\1\226\1\234\2\226\2\117\1\226"+ - "\6\117\3\226\11\117\1\36\12\117\1\0\3\33\1\0"+ - "\2\33\1\34\3\33\3\0\1\33\4\0\2\33\16\0"+ - "\1\221\3\117\1\226\1\117\1\226\10\117\1\226\1\117"+ - "\2\226\10\117\1\36\12\117\1\0\3\33\1\0\2\33"+ - "\1\34\3\33\3\0\1\33\4\0\2\33\16\0\1\221"+ - "\4\117\1\235\5\117\1\226\17\117\1\36\12\117\1\0"+ - "\3\33\1\0\2\33\1\34\3\33\3\0\1\33\4\0"+ - "\2\33\16\0\1\221\4\117\2\226\2\117\1\226\1\117"+ - "\1\226\13\117\1\226\2\117\1\226\1\36\12\117\1\0"+ - "\3\33\1\0\2\33\1\34\3\33\3\0\1\33\4\0"+ - "\2\33\16\0\1\221\1\226\1\117\3\226\1\236\14\226"+ - "\2\117\2\226\2\117\1\226\1\117\1\36\12\117\1\0"+ - "\3\33\1\0\2\33\1\34\3\33\3\0\1\33\4\0"+ - "\2\33\16\0\1\221\2\117\4\226\3\117\2\226\1\237"+ - "\1\226\1\117\2\226\12\117\1\36\12\117\1\0\3\33"+ - "\1\0\2\33\1\34\3\33\3\0\1\33\4\0\2\33"+ - "\16\0\1\221\2\226\2\117\1\226\3\117\1\226\5\117"+ - "\3\226\3\117\1\226\2\117\3\226\1\36\12\117\1\0"+ - "\3\33\1\0\2\33\1\34\3\33\3\0\1\33\4\0"+ - "\2\33\16\0\1\221\5\226\1\240\1\117\1\226\1\241"+ - "\7\226\1\242\3\226\1\117\1\226\1\117\3\226\1\36"+ - "\12\117\1\0\3\33\1\0\2\33\1\34\3\33\3\0"+ - "\1\33\4\0\2\33\16\0\1\221\1\243\1\226\1\117"+ - "\1\232\6\226\3\117\1\226\2\117\1\226\2\117\1\226"+ - "\6\117\1\36\12\117\1\0\3\33\1\0\2\33\1\34"+ - "\3\33\3\0\1\33\4\0\2\33\16\0\1\221\1\226"+ - "\31\117\1\36\12\117\1\0\3\33\1\0\2\33\1\34"+ - "\3\33\3\0\1\33\4\0\2\33\16\0\1\221\1\226"+ - "\2\117\1\226\1\244\1\117\2\226\1\117\3\226\2\117"+ - "\2\226\1\117\1\226\3\117\1\226\2\117\2\226\1\36"+ - "\12\117\1\0\3\33\1\0\2\33\1\34\3\33\3\0"+ - "\1\33\4\0\2\33\16\0\1\221\6\226\1\117\5\226"+ - "\3\117\2\226\2\117\7\226\1\36\12\117\1\0\3\33"+ - "\1\0\2\33\1\34\3\33\3\0\1\33\4\0\2\33"+ - "\16\0\1\221\1\117\2\226\1\241\1\245\3\226\1\117"+ - "\3\226\1\117\1\226\1\117\1\226\1\117\1\226\1\117"+ - "\1\226\1\117\3\226\1\117\1\226\1\36\12\117\1\0"+ - "\3\33\1\0\2\33\1\34\3\33\3\0\1\33\4\0"+ - "\2\33\16\0\1\221\1\226\6\117\1\226\6\117\1\226"+ - "\4\117\1\226\4\117\2\226\1\36\12\117\1\0\3\33"+ - "\1\0\2\33\1\34\3\33\3\0\1\33\4\0\2\33"+ - "\16\0\1\221\6\117\1\226\7\117\1\226\13\117\1\36"+ - "\12\117\1\0\3\33\1\0\2\33\1\34\3\33\3\0"+ - "\1\33\4\0\2\33\16\0\1\221\13\117\1\246\16\117"+ - "\1\36\12\117\1\0\3\33\1\0\2\33\1\34\3\33"+ - "\3\0\1\33\4\0\2\33\16\0\1\221\1\226\11\117"+ - "\1\226\6\117\1\226\10\117\1\36\12\117\1\0\3\33"+ - "\1\0\2\33\1\34\3\33\3\0\1\33\4\0\2\33"+ - "\16\0\1\221\1\226\1\117\6\226\1\247\1\117\2\226"+ - "\2\117\2\226\1\117\1\226\1\117\6\226\1\117\1\36"+ - "\12\117\1\0\3\33\1\0\2\33\1\34\3\33\3\0"+ - "\1\33\4\0\2\33\16\0\1\221\4\117\1\226\5\117"+ - "\2\226\3\117\2\226\10\117\1\226\1\36\12\117\1\0"+ - "\3\33\1\0\2\33\1\34\3\33\3\0\1\33\4\0"+ - "\2\33\16\0\1\221\3\117\1\226\1\117\1\250\4\117"+ - "\1\226\2\117\1\226\14\117\1\36\12\117\1\0\3\33"+ - "\1\0\2\33\1\34\3\33\3\0\1\33\4\0\2\33"+ - "\16\0\1\221\2\226\1\117\1\226\3\117\2\226\2\117"+ - "\1\226\4\117\1\226\11\117\1\36\12\117\1\0\3\33"+ - "\1\0\2\33\1\34\3\33\3\0\1\33\4\0\2\33"+ - "\16\0\1\221\3\117\1\226\13\117\1\226\12\117\1\36"+ - "\12\117\1\0\3\33\1\0\2\33\1\34\3\33\3\0"+ - "\1\33\4\0\2\33\16\0\1\221\3\117\2\226\2\117"+ - "\2\226\1\117\2\226\1\117\1\226\3\117\1\226\1\117"+ - "\1\226\1\117\1\226\2\117\1\226\1\117\1\36\12\117"+ - "\1\0\3\33\1\0\2\33\1\34\3\33\3\0\1\33"+ - "\4\0\2\33\6\0\1\3\2\4\2\0\2\27\1\30"+ - "\1\251\32\10\1\36\12\161\1\0\1\33\1\46\1\33"+ - "\1\0\2\47\1\34\3\33\2\0\1\27\1\33\4\0"+ - "\2\33\2\0\1\4\3\0\1\3\2\4\2\0\2\27"+ - "\1\30\1\251\32\10\1\36\12\252\1\0\1\33\1\46"+ - "\1\33\1\0\2\47\1\34\3\33\2\0\1\27\1\33"+ - "\4\0\2\33\2\0\1\4\3\0\1\3\2\4\2\0"+ - "\2\27\1\30\1\251\32\10\1\36\1\161\1\253\1\252"+ - "\2\161\2\252\2\161\1\252\1\0\1\33\1\46\1\33"+ - "\1\0\2\47\1\34\3\33\2\0\1\27\1\33\4\0"+ - "\2\33\2\0\1\4\3\0\1\3\2\4\2\0\2\27"+ - "\1\30\1\254\32\10\1\36\12\163\1\0\1\33\1\46"+ - "\1\33\1\0\2\47\1\34\3\33\2\0\1\27\1\33"+ - "\4\0\2\33\2\0\1\4\3\0\1\3\1\25\6\0"+ - "\1\32\32\52\13\33\1\0\3\33\1\0\2\33\1\34"+ - "\3\33\3\0\1\33\1\57\3\0\2\33\2\0\1\25"+ - "\4\0\1\27\1\4\5\0\1\32\33\33\12\53\1\0"+ - "\3\33\1\0\2\33\1\34\3\33\3\0\1\33\1\57"+ - "\3\0\2\33\2\0\1\27\14\0\4\255\2\0\1\255"+ - "\15\0\1\255\6\0\12\255\1\167\43\0\4\256\2\0"+ - "\1\256\15\0\1\256\6\0\12\256\1\257\43\0\4\260"+ - "\2\0\1\260\15\0\1\260\6\0\1\261\2\262\1\261"+ - "\4\262\1\263\1\262\14\0\1\264\26\0\46\33\1\0"+ - "\3\33\1\0\2\33\1\0\3\33\3\0\1\33\1\57"+ - "\3\0\2\33\17\0\1\265\1\266\1\267\1\270\1\271"+ - "\1\272\1\273\1\274\1\275\1\276\1\277\1\300\1\301"+ - "\1\302\1\303\1\304\1\305\1\306\1\307\1\310\1\311"+ - "\1\312\1\313\1\314\1\315\1\316\1\0\12\63\44\0"+ - "\32\63\1\173\12\63\43\0\74\64\5\0\2\3\1\24"+ - "\1\0\1\25\1\0\1\25\1\26\1\317\32\10\1\36"+ - "\12\37\1\320\1\33\1\40\1\33\1\0\1\33\1\41"+ - "\1\34\1\321\1\322\1\323\3\0\1\33\4\0\2\33"+ - "\2\0\1\3\3\0\2\3\1\24\1\0\1\25\1\0"+ - "\1\25\1\26\1\317\4\10\1\324\25\10\1\36\12\37"+ - "\1\320\1\33\1\40\1\33\1\0\1\33\1\41\1\34"+ - "\1\321\1\322\1\323\3\0\1\33\4\0\2\33\2\0"+ - "\1\3\3\0\2\3\1\24\1\0\1\25\1\0\1\25"+ - "\1\26\1\317\15\10\1\101\14\10\1\36\12\37\1\320"+ - "\1\33\1\40\1\33\1\0\1\33\1\41\1\34\1\321"+ - "\1\322\1\323\3\0\1\33\4\0\2\33\2\0\1\3"+ - "\3\0\2\3\1\24\1\0\1\25\1\0\1\25\1\26"+ - "\1\317\10\10\1\101\21\10\1\36\12\37\1\320\1\33"+ - "\1\40\1\33\1\0\1\33\1\41\1\34\1\321\1\322"+ - "\1\323\3\0\1\33\4\0\2\33\2\0\1\3\3\0"+ - "\2\3\1\24\1\0\1\25\1\0\1\25\1\26\1\317"+ - "\17\10\1\176\12\10\1\36\12\37\1\320\1\33\1\40"+ - "\1\33\1\0\1\33\1\41\1\34\1\321\1\322\1\323"+ - "\3\0\1\33\4\0\2\33\2\0\1\3\3\0\2\3"+ - "\1\24\1\0\1\25\1\0\1\25\1\26\1\317\5\10"+ - "\1\325\4\10\1\176\17\10\1\36\12\37\1\320\1\33"+ - "\1\40\1\33\1\0\1\33\1\41\1\34\1\321\1\322"+ - "\1\323\3\0\1\33\4\0\2\33\2\0\1\3\3\0"+ - "\2\3\1\24\1\0\1\25\1\0\1\25\1\26\1\35"+ - "\20\10\1\176\11\10\1\36\12\37\1\25\1\33\1\40"+ - "\1\33\1\0\1\33\1\41\1\34\3\33\3\0\1\33"+ - "\4\0\2\33\2\0\1\3\3\0\2\3\1\24\1\0"+ - "\1\25\1\0\1\25\1\26\1\35\7\10\1\176\22\10"+ - "\1\36\12\37\1\25\1\33\1\40\1\33\1\0\1\33"+ - "\1\41\1\34\3\33\3\0\1\33\4\0\2\33\2\0"+ - "\1\3\3\0\2\3\1\24\1\0\1\25\1\0\1\25"+ - "\1\26\1\35\27\10\1\176\2\10\1\36\12\37\1\25"+ - "\1\33\1\40\1\33\1\0\1\33\1\41\1\34\3\33"+ - "\3\0\1\33\4\0\2\33\2\0\1\3\3\0\2\3"+ - "\1\24\1\0\1\25\1\0\1\25\1\26\1\317\6\10"+ - "\1\324\10\10\1\176\12\10\1\36\12\37\1\320\1\33"+ - "\1\40\1\33\1\0\1\33\1\41\1\34\1\321\1\322"+ - "\1\323\3\0\1\33\4\0\2\33\2\0\1\3\3\0"+ - "\2\3\1\24\1\0\1\25\1\0\1\25\1\26\1\317"+ - "\24\10\1\326\5\10\1\36\12\37\1\320\1\33\1\40"+ - "\1\33\1\0\1\33\1\41\1\34\1\321\1\322\1\323"+ - "\3\0\1\33\4\0\2\33\2\0\1\3\3\0\2\3"+ - "\1\24\1\0\1\25\1\0\1\25\1\26\1\35\11\10"+ - "\1\176\20\10\1\36\12\37\1\25\1\33\1\40\1\33"+ - "\1\0\1\33\1\41\1\34\3\33\3\0\1\33\4\0"+ - "\2\33\2\0\1\3\3\0\2\3\1\24\1\0\1\25"+ - "\1\0\1\25\1\26\1\317\16\10\1\327\13\10\1\36"+ - "\12\37\1\320\1\33\1\40\1\33\1\0\1\33\1\41"+ - "\1\34\1\321\1\322\1\323\3\0\1\33\4\0\2\33"+ - "\2\0\1\3\3\0\2\3\1\24\1\0\1\25\1\0"+ - "\1\25\1\26\1\317\12\10\1\330\17\10\1\36\12\37"+ - "\1\320\1\33\1\40\1\33\1\0\1\33\1\41\1\34"+ - "\1\321\1\322\1\323\3\0\1\33\4\0\2\33\2\0"+ - "\1\3\3\0\2\3\1\24\1\0\1\25\1\0\1\25"+ - "\1\26\1\317\5\10\1\176\24\10\1\36\12\37\1\320"+ - "\1\33\1\40\1\33\1\0\1\33\1\41\1\34\1\321"+ - "\1\322\1\323\3\0\1\33\4\0\2\33\2\0\1\3"+ - "\3\0\2\3\1\24\1\0\1\25\1\0\1\25\1\26"+ - "\1\317\1\331\31\10\1\36\12\37\1\320\1\33\1\40"+ - "\1\33\1\0\1\33\1\41\1\34\1\321\1\322\1\323"+ - "\3\0\1\33\4\0\2\33\2\0\1\3\3\0\2\3"+ - "\1\24\1\0\1\25\1\0\1\25\1\26\1\35\32\10"+ - "\1\332\12\37\1\25\1\33\1\40\1\33\1\0\1\33"+ - "\1\41\1\34\3\33\3\0\1\33\4\0\2\33\2\0"+ - "\1\3\3\0\2\3\1\24\1\0\1\25\1\0\1\25"+ - "\1\26\1\317\23\10\1\176\6\10\1\36\12\37\1\320"+ - "\1\33\1\40\1\33\1\0\1\33\1\41\1\34\1\321"+ - "\1\322\1\323\3\0\1\33\4\0\2\33\2\0\1\3"+ - "\3\0\2\3\1\24\1\0\1\25\1\0\1\25\1\26"+ - "\1\317\24\10\1\333\5\10\1\36\12\37\1\320\1\33"+ - "\1\40\1\33\1\0\1\33\1\41\1\34\1\321\1\322"+ - "\1\323\3\0\1\33\4\0\2\33\2\0\1\3\13\0"+ - "\1\32\1\126\1\127\1\130\1\131\1\132\1\133\1\134"+ - "\1\135\1\136\1\137\1\140\1\141\1\142\1\143\1\144"+ - "\1\145\1\146\1\147\1\150\1\151\1\152\1\153\1\154"+ - "\1\155\1\156\1\157\1\33\12\117\1\0\3\33\1\0"+ - "\2\33\1\34\3\33\3\0\1\33\1\57\3\0\2\33"+ - "\7\0\1\62\1\24\5\0\1\32\33\33\12\122\1\0"+ - "\3\33\1\0\2\33\1\34\3\33\3\0\1\33\1\57"+ - "\3\0\2\33\2\0\1\62\3\0\2\3\1\24\1\0"+ - "\1\25\1\0\1\25\1\26\1\35\32\10\1\36\12\37"+ - "\1\334\1\33\1\40\1\33\1\0\1\33\1\41\1\34"+ - "\3\33\3\0\1\33\4\0\2\33\2\0\1\3\3\0"+ - "\1\3\1\25\7\0\32\3\24\0\1\335\15\0\1\25"+ - "\3\0\2\3\1\24\1\0\1\25\1\0\1\25\1\26"+ - "\1\35\16\10\1\336\13\10\1\36\12\37\1\337\1\33"+ - "\1\40\1\33\1\0\1\33\1\41\1\34\3\33\3\0"+ - "\1\33\4\0\2\33\2\0\1\3\13\0\1\340\32\117"+ - "\1\36\12\117\1\341\3\33\1\0\2\33\1\34\1\321"+ - "\1\322\1\323\3\0\1\33\4\0\2\33\16\0\1\340"+ - "\4\117\1\342\25\117\1\36\12\117\1\341\3\33\1\0"+ - "\2\33\1\34\1\321\1\322\1\323\3\0\1\33\4\0"+ - "\2\33\16\0\1\340\15\117\1\142\14\117\1\36\12\117"+ - "\1\341\3\33\1\0\2\33\1\34\1\321\1\322\1\323"+ - "\3\0\1\33\4\0\2\33\16\0\1\340\10\117\1\142"+ - "\21\117\1\36\12\117\1\341\3\33\1\0\2\33\1\34"+ - "\1\321\1\322\1\323\3\0\1\33\4\0\2\33\16\0"+ - "\1\340\17\117\1\226\12\117\1\36\12\117\1\341\3\33"+ - "\1\0\2\33\1\34\1\321\1\322\1\323\3\0\1\33"+ - "\4\0\2\33\16\0\1\340\5\117\1\343\4\117\1\226"+ - "\17\117\1\36\12\117\1\341\3\33\1\0\2\33\1\34"+ - "\1\321\1\322\1\323\3\0\1\33\4\0\2\33\16\0"+ - "\1\221\20\117\1\226\11\117\1\36\12\117\1\0\3\33"+ - "\1\0\2\33\1\34\3\33\3\0\1\33\4\0\2\33"+ - "\16\0\1\221\7\117\1\226\22\117\1\36\12\117\1\0"+ - "\3\33\1\0\2\33\1\34\3\33\3\0\1\33\4\0"+ - "\2\33\16\0\1\221\27\117\1\226\2\117\1\36\12\117"+ - "\1\0\3\33\1\0\2\33\1\34\3\33\3\0\1\33"+ - "\4\0\2\33\16\0\1\340\6\117\1\342\10\117\1\226"+ - "\12\117\1\36\12\117\1\341\3\33\1\0\2\33\1\34"+ - "\1\321\1\322\1\323\3\0\1\33\4\0\2\33\16\0"+ - "\1\340\24\117\1\344\5\117\1\36\12\117\1\341\3\33"+ - "\1\0\2\33\1\34\1\321\1\322\1\323\3\0\1\33"+ - "\4\0\2\33\16\0\1\221\11\117\1\226\20\117\1\36"+ - "\12\117\1\0\3\33\1\0\2\33\1\34\3\33\3\0"+ - "\1\33\4\0\2\33\16\0\1\340\16\117\1\345\13\117"+ - "\1\36\12\117\1\341\3\33\1\0\2\33\1\34\1\321"+ - "\1\322\1\323\3\0\1\33\4\0\2\33\16\0\1\340"+ - "\12\117\1\346\17\117\1\36\12\117\1\341\3\33\1\0"+ - "\2\33\1\34\1\321\1\322\1\323\3\0\1\33\4\0"+ - "\2\33\16\0\1\340\5\117\1\226\24\117\1\36\12\117"+ - "\1\341\3\33\1\0\2\33\1\34\1\321\1\322\1\323"+ - "\3\0\1\33\4\0\2\33\16\0\1\340\1\347\31\117"+ - "\1\36\12\117\1\341\3\33\1\0\2\33\1\34\1\321"+ - "\1\322\1\323\3\0\1\33\4\0\2\33\16\0\1\221"+ - "\32\117\1\332\12\117\1\0\3\33\1\0\2\33\1\34"+ - "\3\33\3\0\1\33\4\0\2\33\16\0\1\340\23\117"+ - "\1\226\6\117\1\36\12\117\1\341\3\33\1\0\2\33"+ - "\1\34\1\321\1\322\1\323\3\0\1\33\4\0\2\33"+ - "\16\0\1\340\24\117\1\350\5\117\1\36\12\117\1\341"+ - "\3\33\1\0\2\33\1\34\1\321\1\322\1\323\3\0"+ - "\1\33\4\0\2\33\7\0\1\27\1\4\5\0\1\32"+ - "\1\126\1\127\1\130\1\131\1\132\1\133\1\134\1\135"+ - "\1\136\1\137\1\140\1\141\1\142\1\143\1\144\1\145"+ - "\1\146\1\147\1\150\1\151\1\152\1\153\1\154\1\155"+ - "\1\156\1\157\1\33\1\351\2\352\1\351\4\352\1\353"+ - "\1\352\1\0\3\33\1\0\2\33\1\34\3\33\3\0"+ - "\1\33\1\57\3\0\2\33\2\0\1\27\3\0\1\3"+ - "\2\4\2\0\2\27\1\30\1\251\32\10\1\36\12\163"+ - "\1\0\1\33\1\46\1\33\1\0\2\47\1\34\3\33"+ - "\2\0\1\27\1\33\4\0\2\33\2\0\1\4\3\0"+ - "\1\3\2\4\2\0\2\27\1\30\1\251\32\10\1\36"+ - "\2\252\1\163\2\252\2\163\2\252\1\163\1\0\1\33"+ - "\1\46\1\33\1\0\2\47\1\34\3\33\2\0\1\27"+ - "\1\33\4\0\2\33\2\0\1\4\4\0\1\27\1\4"+ - "\5\0\1\32\1\126\1\127\1\130\1\131\1\132\1\133"+ - "\1\134\1\135\1\136\1\137\1\140\1\141\1\142\1\143"+ - "\1\144\1\145\1\146\1\147\1\150\1\151\1\152\1\153"+ - "\1\154\1\155\1\156\1\157\1\33\12\163\1\0\3\33"+ - "\1\0\2\33\1\34\3\33\3\0\1\33\1\57\3\0"+ - "\2\33\2\0\1\27\14\0\4\354\2\0\1\354\15\0"+ - "\1\354\6\0\12\354\1\167\43\0\4\355\2\0\1\355"+ - "\15\0\1\355\6\0\12\355\1\356\43\0\4\357\2\0"+ - "\1\357\15\0\1\357\6\0\1\360\2\361\1\360\4\361"+ - "\1\362\1\361\14\0\1\264\27\0\4\363\2\0\1\363"+ - "\15\0\1\363\6\0\12\363\1\364\13\0\1\264\26\0"+ - "\1\365\4\363\2\0\1\363\15\0\1\363\6\0\12\366"+ - "\1\364\13\0\1\264\26\0\1\365\4\363\2\0\1\363"+ - "\15\0\1\363\6\0\12\367\1\364\13\0\1\264\26\0"+ - "\1\365\4\363\2\0\1\363\15\0\1\363\6\0\1\366"+ - "\1\370\1\367\2\366\2\367\2\366\1\367\1\364\13\0"+ - "\1\264\74\0\1\341\7\0\1\371\1\372\1\373\30\0"+ - "\1\172\1\63\2\374\1\375\1\376\10\374\1\63\1\377"+ - "\5\374\6\63\1\173\12\63\43\0\1\172\1\u0100\2\374"+ - "\1\63\1\374\1\u0101\6\374\4\63\1\374\1\63\2\374"+ - "\1\63\1\374\1\63\3\374\1\173\12\63\43\0\1\172"+ - "\3\63\1\374\1\63\1\374\4\63\1\374\10\63\1\374"+ - "\2\63\1\374\2\63\1\374\1\173\12\63\43\0\1\172"+ - "\1\63\1\374\1\u0102\2\374\2\63\1\374\6\63\3\374"+ - "\11\63\1\173\12\63\43\0\1\172\3\63\1\374\1\63"+ - "\1\374\10\63\1\374\1\63\2\374\10\63\1\173\12\63"+ - "\43\0\1\172\4\63\1\u0103\5\63\1\374\17\63\1\173"+ - "\12\63\43\0\1\172\4\63\2\374\2\63\1\374\1\63"+ - "\1\374\13\63\1\374\2\63\1\374\1\173\12\63\43\0"+ - "\1\172\1\374\1\63\3\374\1\u0104\14\374\2\63\2\374"+ - "\2\63\1\374\1\63\1\173\12\63\43\0\1\172\2\63"+ - "\4\374\3\63\2\374\1\u0105\1\374\1\63\2\374\12\63"+ - "\1\173\12\63\43\0\1\172\2\374\2\63\1\374\3\63"+ - "\1\374\5\63\3\374\3\63\1\374\2\63\3\374\1\173"+ - "\12\63\43\0\1\172\5\374\1\u0106\1\63\1\374\1\u0107"+ - "\7\374\1\u0108\3\374\1\63\1\374\1\63\3\374\1\173"+ - "\12\63\43\0\1\172\1\u0109\1\374\1\63\1\u0100\6\374"+ - "\3\63\1\374\2\63\1\374\2\63\1\374\6\63\1\173"+ - "\12\63\43\0\1\172\1\374\31\63\1\173\12\63\43\0"+ - "\1\172\1\374\2\63\1\374\1\u010a\1\63\2\374\1\63"+ - "\3\374\2\63\2\374\1\63\1\374\3\63\1\374\2\63"+ - "\2\374\1\173\12\63\43\0\1\172\6\374\1\63\5\374"+ - "\3\63\2\374\2\63\7\374\1\173\12\63\43\0\1\172"+ - "\1\63\2\374\1\u0107\1\u010b\3\374\1\63\3\374\1\63"+ - "\1\374\1\63\1\374\1\63\1\374\1\63\1\374\1\63"+ - "\3\374\1\63\1\374\1\173\12\63\43\0\1\172\1\374"+ - "\6\63\1\374\6\63\1\374\4\63\1\374\4\63\2\374"+ - "\1\173\12\63\43\0\1\172\6\63\1\374\7\63\1\374"+ - "\13\63\1\173\12\63\43\0\1\172\13\63\1\u010c\16\63"+ - "\1\173\12\63\43\0\1\172\1\374\11\63\1\374\6\63"+ - "\1\374\10\63\1\173\12\63\43\0\1\172\1\374\1\63"+ - "\6\374\1\u010d\1\63\2\374\2\63\2\374\1\63\1\374"+ - "\1\63\6\374\1\63\1\173\12\63\43\0\1\172\4\63"+ - "\1\374\5\63\2\374\3\63\2\374\10\63\1\374\1\173"+ - "\12\63\43\0\1\172\3\63\1\374\1\63\1\u010e\4\63"+ - "\1\374\2\63\1\374\14\63\1\173\12\63\43\0\1\172"+ - "\2\374\1\63\1\374\3\63\2\374\2\63\1\374\4\63"+ - "\1\374\11\63\1\173\12\63\43\0\1\172\3\63\1\374"+ - "\13\63\1\374\12\63\1\173\12\63\43\0\1\172\3\63"+ - "\2\374\2\63\2\374\1\63\2\374\1\63\1\374\3\63"+ - "\1\374\1\63\1\374\1\63\1\374\2\63\1\374\1\63"+ - "\1\173\12\63\33\0\1\3\1\25\6\0\1\32\1\65"+ - "\1\66\1\67\1\70\1\71\1\72\1\73\1\74\1\75"+ - "\1\76\1\77\1\100\1\101\1\102\1\103\1\104\1\105"+ - "\1\106\1\107\1\110\1\111\1\112\1\113\1\114\1\115"+ - "\1\116\1\33\12\117\1\341\3\33\1\0\2\33\1\34"+ - "\1\321\1\322\1\323\3\0\1\33\1\57\3\0\2\33"+ - "\2\0\1\25\3\0\1\3\1\25\7\0\32\3\1\0"+ - "\12\u010f\27\0\1\25\13\0\1\u0110\45\321\1\371\2\321"+ - "\1\u0111\1\371\2\321\1\u0112\2\321\1\323\2\0\1\371"+ - "\1\321\4\0\1\321\1\33\16\0\1\u0113\45\322\1\372"+ - "\2\322\1\u0114\1\0\2\33\1\u0115\1\321\1\322\1\323"+ - "\2\0\1\372\1\322\4\0\2\33\16\0\1\u0116\45\323"+ - "\1\373\2\323\1\u0117\1\373\2\323\1\u0118\2\323\1\33"+ - "\2\0\1\373\1\323\4\0\1\323\1\33\6\0\2\3"+ - "\1\24\1\0\1\25\1\0\1\25\1\26\1\35\5\10"+ - "\1\176\24\10\1\36\12\37\1\25\1\33\1\40\1\33"+ - "\1\0\1\33\1\41\1\34\3\33\3\0\1\33\4\0"+ - "\2\33\2\0\1\3\3\0\2\3\1\24\1\0\1\25"+ - "\1\0\1\25\1\26\1\35\15\10\1\176\14\10\1\36"+ - "\12\37\1\25\1\33\1\40\1\33\1\0\1\33\1\41"+ - "\1\34\3\33\3\0\1\33\4\0\2\33\2\0\1\3"+ - "\3\0\2\3\1\24\1\0\1\25\1\0\1\25\1\26"+ - "\1\35\10\10\1\176\21\10\1\36\12\37\1\25\1\33"+ - "\1\40\1\33\1\0\1\33\1\41\1\34\3\33\3\0"+ - "\1\33\4\0\2\33\2\0\1\3\3\0\2\3\1\24"+ - "\1\0\1\25\1\0\1\25\1\26\1\35\3\10\1\u0119"+ - "\26\10\1\36\12\37\1\25\1\33\1\40\1\33\1\0"+ - "\1\33\1\41\1\34\3\33\3\0\1\33\4\0\2\33"+ - "\2\0\1\3\3\0\2\3\1\24\1\0\1\25\1\0"+ - "\1\25\1\26\1\35\3\10\1\176\26\10\1\36\12\37"+ - "\1\25\1\33\1\40\1\33\1\0\1\33\1\41\1\34"+ - "\3\33\3\0\1\33\4\0\2\33\2\0\1\3\3\0"+ - "\2\3\1\24\1\0\1\25\1\0\1\25\1\26\1\35"+ - "\27\10\1\u011a\2\10\1\36\12\37\1\25\1\33\1\40"+ - "\1\33\1\0\1\33\1\41\1\34\3\33\3\0\1\33"+ - "\4\0\2\33\2\0\1\3\13\0\1\32\32\117\1\u011b"+ - "\12\117\1\0\3\33\1\0\2\33\1\34\3\33\3\0"+ - "\1\33\4\0\2\33\6\0\2\3\1\24\1\0\1\25"+ - "\1\0\1\25\1\26\1\35\16\10\1\176\13\10\1\36"+ - "\12\37\1\25\1\33\1\40\1\33\1\0\1\33\1\41"+ - "\1\34\3\33\3\0\1\33\4\0\2\33\2\0\1\3"+ - "\3\0\1\3\1\25\7\0\32\3\24\0\1\u011c\15\0"+ - "\1\25\72\0\1\u011d\21\0\2\3\1\24\1\0\1\25"+ - "\1\0\1\25\1\26\1\35\32\10\1\36\12\37\1\337"+ - "\1\33\1\40\1\33\1\0\1\33\1\41\1\34\3\33"+ - "\3\0\1\33\4\0\2\33\2\0\1\3\3\0\1\3"+ - "\1\25\7\0\32\3\24\0\1\u011e\15\0\1\25\13\0"+ - "\1\32\1\126\1\127\1\130\1\131\1\132\1\133\1\134"+ - "\1\135\1\136\1\137\1\140\1\141\1\142\1\143\1\144"+ - "\1\145\1\146\1\147\1\150\1\151\1\152\1\153\1\154"+ - "\1\155\1\156\1\157\1\33\12\117\1\341\3\33\1\0"+ - "\2\33\1\34\1\321\1\322\1\323\3\0\1\33\1\57"+ - "\3\0\2\33\52\0\12\u010f\43\0\1\221\5\117\1\226"+ - "\24\117\1\36\12\117\1\0\3\33\1\0\2\33\1\34"+ - "\3\33\3\0\1\33\4\0\2\33\16\0\1\221\15\117"+ - "\1\226\14\117\1\36\12\117\1\0\3\33\1\0\2\33"+ - "\1\34\3\33\3\0\1\33\4\0\2\33\16\0\1\221"+ - "\10\117\1\226\21\117\1\36\12\117\1\0\3\33\1\0"+ - "\2\33\1\34\3\33\3\0\1\33\4\0\2\33\16\0"+ - "\1\221\3\117\1\u011f\26\117\1\36\12\117\1\0\3\33"+ - "\1\0\2\33\1\34\3\33\3\0\1\33\4\0\2\33"+ - "\16\0\1\221\3\117\1\226\26\117\1\36\12\117\1\0"+ - "\3\33\1\0\2\33\1\34\3\33\3\0\1\33\4\0"+ - "\2\33\16\0\1\221\27\117\1\u0120\2\117\1\36\12\117"+ - "\1\0\3\33\1\0\2\33\1\34\3\33\3\0\1\33"+ - "\4\0\2\33\16\0\1\221\16\117\1\226\13\117\1\36"+ - "\12\117\1\0\3\33\1\0\2\33\1\34\3\33\3\0"+ - "\1\33\4\0\2\33\6\0\1\3\2\4\2\0\2\27"+ - "\1\30\1\u0121\32\10\1\36\12\352\1\0\1\33\1\46"+ - "\1\33\1\0\2\47\1\34\3\33\2\0\1\27\1\33"+ - "\4\0\2\33\2\0\1\4\3\0\1\3\2\4\2\0"+ - "\2\27\1\30\1\u0121\32\10\1\36\12\u0122\1\0\1\33"+ - "\1\46\1\33\1\0\2\47\1\34\3\33\2\0\1\27"+ - "\1\33\4\0\2\33\2\0\1\4\3\0\1\3\2\4"+ - "\2\0\2\27\1\30\1\u0121\32\10\1\36\1\352\1\u0123"+ - "\1\u0122\2\352\2\u0122\2\352\1\u0122\1\0\1\33\1\46"+ - "\1\33\1\0\2\47\1\34\3\33\2\0\1\27\1\33"+ - "\4\0\2\33\2\0\1\4\61\0\1\167\43\0\4\u0124"+ - "\2\0\1\u0124\15\0\1\u0124\6\0\12\u0124\1\356\43\0"+ - "\4\u0125\2\0\1\u0125\15\0\1\u0125\6\0\12\u0125\1\u0126"+ - "\43\0\4\u0127\2\0\1\u0127\15\0\1\u0127\6\0\12\u0127"+ - "\1\u0128\13\0\1\264\26\0\1\365\4\u0127\2\0\1\u0127"+ - "\15\0\1\u0127\6\0\12\u0129\1\u0128\13\0\1\264\26\0"+ - "\1\365\4\u0127\2\0\1\u0127\15\0\1\u0127\6\0\12\u012a"+ - "\1\u0128\13\0\1\264\26\0\1\365\4\u0127\2\0\1\u0127"+ - "\15\0\1\u0127\6\0\1\u0129\1\u012b\1\u012a\2\u0129\2\u012a"+ - "\2\u0129\1\u012a\1\u0128\13\0\1\264\27\0\4\u012c\2\0"+ - "\1\u012c\15\0\1\u012c\6\0\12\u012c\1\364\13\0\1\264"+ - "\27\0\4\357\2\0\1\357\15\0\1\357\6\0\1\360"+ - "\2\361\1\360\4\361\1\362\1\361\77\0\1\u012d\2\u012e"+ - "\1\u012d\4\u012e\1\u012f\1\u012e\43\0\1\365\4\u012c\2\0"+ - "\1\u012c\15\0\1\u012c\6\0\12\u0130\1\364\13\0\1\264"+ - "\26\0\1\365\4\u012c\2\0\1\u012c\15\0\1\u012c\6\0"+ - "\12\u012c\1\364\13\0\1\264\26\0\1\365\4\u012c\2\0"+ - "\1\u012c\15\0\1\u012c\6\0\2\u0130\1\u012c\2\u0130\2\u012c"+ - "\2\u0130\1\u012c\1\364\13\0\1\264\26\0\51\371\1\u0131"+ - "\6\371\1\373\2\0\2\371\4\0\1\371\17\0\51\372"+ - "\1\u0132\3\0\1\372\1\371\1\372\1\373\2\0\2\372"+ - "\24\0\51\373\1\u0133\6\373\3\0\2\373\4\0\1\373"+ - "\17\0\1\u0134\32\63\1\173\12\63\43\0\1\u0134\4\63"+ - "\1\u0135\25\63\1\173\12\63\43\0\1\u0134\15\63\1\301"+ - "\14\63\1\173\12\63\43\0\1\u0134\10\63\1\301\21\63"+ - "\1\173\12\63\43\0\1\u0134\17\63\1\374\12\63\1\173"+ - "\12\63\43\0\1\u0134\5\63\1\u0136\4\63\1\374\17\63"+ - "\1\173\12\63\43\0\1\172\20\63\1\374\11\63\1\173"+ - "\12\63\43\0\1\172\7\63\1\374\22\63\1\173\12\63"+ - "\43\0\1\172\27\63\1\374\2\63\1\173\12\63\43\0"+ - "\1\u0134\6\63\1\u0135\10\63\1\374\12\63\1\173\12\63"+ - "\43\0\1\u0134\24\63\1\u0137\5\63\1\173\12\63\43\0"+ - "\1\172\11\63\1\374\20\63\1\173\12\63\43\0\1\u0134"+ - "\16\63\1\u0138\13\63\1\173\12\63\43\0\1\u0134\12\63"+ - "\1\u0139\17\63\1\173\12\63\43\0\1\u0134\5\63\1\374"+ - "\24\63\1\173\12\63\43\0\1\u0134\1\u013a\31\63\1\173"+ - "\12\63\43\0\1\172\32\63\1\u013b\12\63\43\0\1\u0134"+ - "\23\63\1\374\6\63\1\173\12\63\43\0\1\u0134\24\63"+ - "\1\u013c\5\63\1\173\12\63\77\0\12\u013d\10\0\1\371"+ - "\1\372\1\373\30\0\1\u0110\45\321\1\371\2\321\1\u0111"+ - "\1\371\2\321\1\u0112\2\321\1\323\2\0\1\371\1\321"+ - "\1\57\3\0\1\321\1\33\16\0\1\32\4\u013e\2\33"+ - "\1\u013e\15\33\1\u013e\6\33\12\u013e\1\0\3\33\1\0"+ - "\2\33\1\34\3\33\3\0\1\33\4\0\2\33\16\0"+ - "\51\371\1\u0131\6\371\1\373\1\64\1\0\2\371\4\0"+ - "\1\371\17\0\1\u0113\45\322\1\372\2\322\1\u0114\1\0"+ - "\2\33\1\u0115\1\321\1\322\1\323\2\0\1\372\1\322"+ - "\1\57\3\0\2\33\16\0\1\32\4\u013f\2\33\1\u013f"+ - "\15\33\1\u013f\6\33\12\u013f\1\0\3\33\1\0\2\33"+ - "\1\34\3\33\3\0\1\33\4\0\2\33\16\0\51\372"+ - "\1\u0132\3\0\1\372\1\371\1\372\1\373\1\64\1\0"+ - "\2\372\24\0\1\u0116\45\323\1\373\2\323\1\u0117\1\373"+ - "\2\323\1\u0118\2\323\1\33\2\0\1\373\1\323\1\57"+ - "\3\0\1\323\1\33\16\0\1\32\4\u0140\2\33\1\u0140"+ - "\15\33\1\u0140\6\33\12\u0140\1\0\3\33\1\0\2\33"+ - "\1\34\3\33\3\0\1\33\4\0\2\33\16\0\51\373"+ - "\1\u0133\6\373\1\0\1\64\1\0\2\373\4\0\1\373"+ - "\7\0\2\3\1\24\1\0\1\25\1\0\1\25\1\26"+ - "\1\35\20\10\1\u0141\11\10\1\36\12\37\1\25\1\33"+ - "\1\40\1\33\1\0\1\33\1\41\1\34\3\33\3\0"+ - "\1\33\4\0\2\33\2\0\1\3\3\0\2\3\1\24"+ - "\1\0\1\25\1\0\1\25\1\26\1\35\3\10\1\211"+ - "\26\10\1\36\12\37\1\25\1\33\1\40\1\33\1\0"+ - "\1\33\1\41\1\34\3\33\3\0\1\33\4\0\2\33"+ - "\2\0\1\3\13\0\1\32\2\117\1\u0142\2\117\1\u0143"+ - "\1\u0144\1\u0145\2\117\1\u0146\2\117\1\u0147\3\117\1\u0148"+ - "\1\u0149\1\u014a\1\117\1\u014b\1\u014c\1\117\1\u014d\1\u014e"+ - "\1\36\1\u014f\2\117\1\u0150\1\117\1\u0151\1\u0152\3\117"+ - "\1\0\3\33\1\0\2\33\1\34\3\33\3\0\1\33"+ - "\4\0\2\33\75\0\1\u0153\31\0\1\u0154\32\u0155\1\u0154"+ - "\12\u0155\1\u0156\2\u0154\1\u0157\3\u0154\1\u0158\3\0\1\u0159"+ - "\1\0\2\u0154\4\0\1\u0154\76\0\1\u015a\31\0\1\221"+ - "\20\117\1\u015b\11\117\1\36\12\117\1\0\3\33\1\0"+ - "\2\33\1\34\3\33\3\0\1\33\4\0\2\33\16\0"+ - "\1\221\3\117\1\241\26\117\1\36\12\117\1\0\3\33"+ - "\1\0\2\33\1\34\3\33\3\0\1\33\4\0\2\33"+ - "\7\0\1\27\1\4\5\0\1\32\1\126\1\127\1\130"+ - "\1\131\1\132\1\133\1\134\1\135\1\136\1\137\1\140"+ - "\1\141\1\142\1\143\1\144\1\145\1\146\1\147\1\150"+ - "\1\151\1\152\1\153\1\154\1\155\1\156\1\157\1\33"+ - "\1\u015c\2\u015d\1\u015c\4\u015d\1\u015e\1\u015d\1\0\3\33"+ - "\1\0\2\33\1\34\3\33\3\0\1\33\1\57\3\0"+ - "\2\33\2\0\1\27\3\0\1\3\2\4\2\0\2\27"+ - "\1\30\1\u0121\32\10\1\36\12\163\1\0\1\33\1\46"+ - "\1\33\1\0\2\47\1\34\3\33\2\0\1\27\1\33"+ - "\4\0\2\33\2\0\1\4\3\0\1\3\2\4\2\0"+ - "\2\27\1\30\1\u0121\32\10\1\36\2\u0122\1\163\2\u0122"+ - "\2\163\2\u0122\1\163\1\0\1\33\1\46\1\33\1\0"+ - "\2\47\1\34\3\33\2\0\1\27\1\33\4\0\2\33"+ - "\2\0\1\4\14\0\4\u015f\2\0\1\u015f\15\0\1\u015f"+ - "\6\0\12\u015f\1\356\43\0\4\u0160\2\0\1\u0160\15\0"+ - "\1\u0160\6\0\12\u0160\1\u0161\43\0\4\u0162\2\0\1\u0162"+ - "\15\0\1\u0162\6\0\1\u0163\2\u0164\1\u0163\4\u0164\1\u0165"+ - "\1\u0164\14\0\1\264\27\0\4\u0166\2\0\1\u0166\15\0"+ - "\1\u0166\6\0\12\u0166\1\u0128\13\0\1\264\27\0\4\u0162"+ - "\2\0\1\u0162\15\0\1\u0162\6\0\1\u0163\2\u0164\1\u0163"+ - "\4\u0164\1\u0165\1\u0164\43\0\1\365\4\u0166\2\0\1\u0166"+ - "\15\0\1\u0166\6\0\12\u0167\1\u0128\13\0\1\264\26\0"+ - "\1\365\4\u0166\2\0\1\u0166\15\0\1\u0166\6\0\12\u0166"+ - "\1\u0128\13\0\1\264\26\0\1\365\4\u0166\2\0\1\u0166"+ - "\15\0\1\u0166\6\0\2\u0167\1\u0166\2\u0167\2\u0166\2\u0167"+ - "\1\u0166\1\u0128\13\0\1\264\27\0\4\u0168\2\0\1\u0168"+ - "\15\0\1\u0168\6\0\12\u0168\1\364\13\0\1\264\26\0"+ - "\1\u0169\33\0\12\u012e\43\0\1\u0169\33\0\12\u016a\43\0"+ - "\1\u0169\33\0\1\u012e\1\u016b\1\u016a\2\u012e\2\u016a\2\u012e"+ - "\1\u016a\43\0\1\365\4\u0168\2\0\1\u0168\15\0\1\u0168"+ - "\6\0\12\u0168\1\364\13\0\1\264\27\0\4\u016c\2\0"+ - "\1\u016c\15\0\1\u016c\6\0\12\u016c\44\0\4\u016d\2\0"+ - "\1\u016d\15\0\1\u016d\6\0\12\u016d\44\0\4\u016e\2\0"+ - "\1\u016e\15\0\1\u016e\6\0\12\u016e\43\0\1\172\5\63"+ - "\1\374\24\63\1\173\12\63\43\0\1\172\15\63\1\374"+ - "\14\63\1\173\12\63\43\0\1\172\10\63\1\374\21\63"+ - "\1\173\12\63\43\0\1\172\3\63\1\u016f\26\63\1\173"+ - "\12\63\43\0\1\172\3\63\1\374\26\63\1\173\12\63"+ - "\43\0\1\172\27\63\1\u0170\2\63\1\173\12\63\44\0"+ - "\32\63\1\u0171\12\63\43\0\1\172\16\63\1\374\13\63"+ - "\1\173\12\63\77\0\12\u0172\10\0\1\371\1\372\1\373"+ - "\30\0\1\32\4\321\2\33\1\321\15\33\1\321\6\33"+ - "\12\321\1\0\3\33\1\0\2\33\1\34\3\33\3\0"+ - "\1\33\4\0\2\33\16\0\1\32\4\322\2\33\1\322"+ - "\15\33\1\322\6\33\12\322\1\0\3\33\1\0\2\33"+ - "\1\34\3\33\3\0\1\33\4\0\2\33\16\0\1\32"+ - "\4\323\2\33\1\323\15\33\1\323\6\33\12\323\1\0"+ - "\3\33\1\0\2\33\1\34\3\33\3\0\1\33\4\0"+ - "\2\33\6\0\2\3\1\24\1\0\1\25\1\0\1\25"+ - "\1\26\1\35\12\10\1\176\17\10\1\36\12\37\1\25"+ - "\1\33\1\40\1\33\1\0\1\33\1\41\1\34\3\33"+ - "\3\0\1\33\4\0\2\33\2\0\1\3\13\0\1\221"+ - "\3\117\1\u0173\26\117\1\36\12\117\1\0\3\33\1\0"+ - "\2\33\1\34\3\33\3\0\1\33\4\0\2\33\16\0"+ - "\1\221\32\117\1\36\4\117\1\u0174\5\117\1\0\3\33"+ - "\1\0\2\33\1\34\3\33\3\0\1\33\4\0\2\33"+ - "\16\0\1\221\10\117\1\u0175\12\117\1\u0176\6\117\1\36"+ - "\12\117\1\0\3\33\1\0\2\33\1\34\3\33\3\0"+ - "\1\33\4\0\2\33\16\0\1\221\32\117\1\36\2\117"+ - "\1\u0177\7\117\1\0\3\33\1\0\2\33\1\34\3\33"+ - "\3\0\1\33\4\0\2\33\16\0\1\221\7\117\1\u0178"+ - "\22\117\1\36\12\117\1\0\3\33\1\0\2\33\1\34"+ - "\3\33\3\0\1\33\4\0\2\33\16\0\1\221\7\117"+ - "\1\u0179\22\117\1\36\3\117\1\u017a\6\117\1\0\3\33"+ - "\1\0\2\33\1\34\3\33\3\0\1\33\4\0\2\33"+ - "\16\0\1\221\7\117\1\u017b\22\117\1\36\12\117\1\0"+ - "\3\33\1\0\2\33\1\34\3\33\3\0\1\33\4\0"+ - "\2\33\16\0\1\221\31\117\1\u017c\1\36\12\117\1\0"+ - "\3\33\1\0\2\33\1\34\3\33\3\0\1\33\4\0"+ - "\2\33\16\0\1\221\1\117\1\u017d\30\117\1\36\12\117"+ - "\1\0\3\33\1\0\2\33\1\34\3\33\3\0\1\33"+ - "\4\0\2\33\16\0\1\221\7\117\1\u017e\1\117\1\u017f"+ - "\20\117\1\36\12\117\1\0\3\33\1\0\2\33\1\34"+ - "\3\33\3\0\1\33\4\0\2\33\16\0\1\221\22\117"+ - "\1\u0180\7\117\1\36\2\117\1\u0181\7\117\1\0\3\33"+ - "\1\0\2\33\1\34\3\33\3\0\1\33\4\0\2\33"+ - "\16\0\1\221\7\117\1\u0182\22\117\1\36\12\117\1\0"+ - "\3\33\1\0\2\33\1\34\3\33\3\0\1\33\4\0"+ - "\2\33\16\0\1\221\7\117\1\u0183\5\117\1\u0184\14\117"+ - "\1\36\12\117\1\0\3\33\1\0\2\33\1\34\3\33"+ - "\3\0\1\33\4\0\2\33\16\0\1\221\23\117\1\u0185"+ - "\6\117\1\36\12\117\1\0\3\33\1\0\2\33\1\34"+ - "\3\33\3\0\1\33\4\0\2\33\16\0\1\221\32\117"+ - "\1\36\3\117\1\u0186\6\117\1\0\3\33\1\0\2\33"+ - "\1\34\3\33\3\0\1\33\4\0\2\33\16\0\1\221"+ - "\17\117\1\u0187\12\117\1\36\12\117\1\0\3\33\1\0"+ - "\2\33\1\34\3\33\3\0\1\33\4\0\2\33\16\0"+ - "\1\221\32\117\1\36\1\u0188\11\117\1\0\3\33\1\0"+ - "\2\33\1\34\3\33\3\0\1\33\4\0\2\33\17\0"+ - "\32\u0189\1\0\12\u0189\11\0\1\u018a\1\0\1\u018b\27\0"+ - "\46\u0154\1\u0156\2\u0154\1\u0157\3\u0154\1\u0158\5\0\2\u0154"+ - "\4\0\1\u0154\17\0\1\u018c\32\u0155\1\u018d\12\u0155\1\u018e"+ - "\2\u0154\1\u0157\3\u0154\1\u0158\1\0\1\u018f\3\0\2\u0154"+ - "\4\0\1\u0154\17\0\46\u0156\1\0\2\u0156\1\u0190\3\u0156"+ - "\1\u0158\5\0\2\u0156\4\0\1\u0156\20\0\4\u0191\2\0"+ - "\1\u0191\15\0\1\u0191\6\0\12\u0191\44\0\32\u0192\1\0"+ - "\12\u0192\13\0\1\u0159\30\0\4\u0193\2\0\1\u0193\15\0"+ - "\1\u0193\6\0\12\u0193\1\u0194\42\0\1\u0195\32\u0196\1\u0195"+ - "\12\u0196\1\u0197\2\u0195\1\u0198\3\u0195\1\u0199\3\0\1\u019a"+ - "\1\0\2\u0195\4\0\1\u0195\17\0\1\221\12\117\1\226"+ - "\17\117\1\36\12\117\1\0\3\33\1\0\2\33\1\34"+ - "\3\33\3\0\1\33\4\0\2\33\6\0\1\3\2\4"+ - "\2\0\2\27\1\30\1\254\32\10\1\36\12\u015d\1\341"+ - "\1\33\1\46\1\33\1\0\2\47\1\34\1\321\1\322"+ - "\1\323\2\0\1\27\1\33\4\0\2\33\2\0\1\4"+ - "\3\0\1\3\2\4\2\0\2\27\1\30\1\254\32\10"+ - "\1\36\12\u019b\1\341\1\33\1\46\1\33\1\0\2\47"+ - "\1\34\1\321\1\322\1\323\2\0\1\27\1\33\4\0"+ - "\2\33\2\0\1\4\3\0\1\3\2\4\2\0\2\27"+ - "\1\30\1\254\32\10\1\36\1\u015d\1\u019c\1\u019b\2\u015d"+ - "\2\u019b\2\u015d\1\u019b\1\341\1\33\1\46\1\33\1\0"+ - "\2\47\1\34\1\321\1\322\1\323\2\0\1\27\1\33"+ - "\4\0\2\33\2\0\1\4\61\0\1\356\43\0\4\u019d"+ - "\2\0\1\u019d\15\0\1\u019d\6\0\12\u019d\1\u0161\43\0"+ - "\4\u019e\2\0\1\u019e\15\0\1\u019e\6\0\12\u019e\1\u019f"+ - "\43\0\4\u01a0\2\0\1\u01a0\15\0\1\u01a0\6\0\12\u01a0"+ - "\1\u01a1\13\0\1\264\26\0\1\365\4\u01a0\2\0\1\u01a0"+ - "\15\0\1\u01a0\6\0\12\u01a2\1\u01a1\13\0\1\264\26\0"+ - "\1\365\4\u01a0\2\0\1\u01a0\15\0\1\u01a0\6\0\12\u01a3"+ - "\1\u01a1\13\0\1\264\26\0\1\365\4\u01a0\2\0\1\u01a0"+ - "\15\0\1\u01a0\6\0\1\u01a2\1\u01a4\1\u01a3\2\u01a2\2\u01a3"+ - "\2\u01a2\1\u01a3\1\u01a1\13\0\1\264\27\0\4\u01a5\2\0"+ - "\1\u01a5\15\0\1\u01a5\6\0\12\u01a5\1\u0128\13\0\1\264"+ - "\26\0\1\365\4\u01a5\2\0\1\u01a5\15\0\1\u01a5\6\0"+ - "\12\u01a5\1\u0128\13\0\1\264\74\0\1\364\13\0\1\264"+ - "\62\0\1\u01a6\2\u01a7\1\u01a6\4\u01a7\1\u01a8\1\u01a7\43\0"+ - "\1\u0169\110\0\1\u0169\33\0\2\u016a\1\0\2\u016a\2\0"+ - "\2\u016a\45\0\4\371\2\0\1\371\15\0\1\371\6\0"+ - "\12\371\44\0\4\372\2\0\1\372\15\0\1\372\6\0"+ - "\12\372\44\0\4\373\2\0\1\373\15\0\1\373\6\0"+ - "\12\373\43\0\1\172\20\63\1\u01a9\11\63\1\173\12\63"+ - "\43\0\1\172\3\63\1\u0107\26\63\1\173\12\63\44\0"+ - "\2\63\1\u01aa\2\63\1\u01ab\1\u01ac\1\u01ad\2\63\1\u01ae"+ - "\2\63\1\u01af\3\63\1\u01b0\1\u01b1\1\u01b2\1\63\1\u01b3"+ - "\1\u01b4\1\63\1\u01b5\1\u01b6\1\173\1\u01b7\2\63\1\u01b8"+ - "\1\63\1\u01b9\1\u01ba\3\63\77\0\12\u01bb\10\0\1\371"+ - "\1\372\1\373\30\0\1\221\24\117\1\u01bc\5\117\1\36"+ - "\12\117\1\0\3\33\1\0\2\33\1\34\3\33\3\0"+ - "\1\33\4\0\2\33\16\0\1\221\1\117\1\u01bd\30\117"+ - "\1\36\12\117\1\0\3\33\1\0\2\33\1\34\3\33"+ - "\3\0\1\33\4\0\2\33\16\0\1\221\14\117\1\u01be"+ - "\15\117\1\36\12\117\1\0\3\33\1\0\2\33\1\34"+ - "\3\33\3\0\1\33\4\0\2\33\16\0\1\221\1\117"+ - "\1\u01bf\30\117\1\36\12\117\1\0\3\33\1\0\2\33"+ - "\1\34\3\33\3\0\1\33\4\0\2\33\16\0\1\221"+ - "\21\117\1\u01c0\10\117\1\36\12\117\1\0\3\33\1\0"+ - "\2\33\1\34\3\33\3\0\1\33\4\0\2\33\16\0"+ - "\1\221\24\117\1\u01c1\5\117\1\36\12\117\1\0\3\33"+ - "\1\0\2\33\1\34\3\33\3\0\1\33\4\0\2\33"+ - "\16\0\1\221\24\117\1\u01c2\5\117\1\36\12\117\1\0"+ - "\3\33\1\0\2\33\1\34\3\33\3\0\1\33\4\0"+ - "\2\33\16\0\1\221\1\344\31\117\1\36\12\117\1\0"+ - "\3\33\1\0\2\33\1\34\3\33\3\0\1\33\4\0"+ - "\2\33\16\0\1\221\24\117\1\u01c3\5\117\1\36\12\117"+ - "\1\0\3\33\1\0\2\33\1\34\3\33\3\0\1\33"+ - "\4\0\2\33\16\0\1\221\1\117\1\u01c4\30\117\1\36"+ - "\12\117\1\0\3\33\1\0\2\33\1\34\3\33\3\0"+ - "\1\33\4\0\2\33\16\0\1\221\31\117\1\u01c5\1\36"+ - "\12\117\1\0\3\33\1\0\2\33\1\34\3\33\3\0"+ - "\1\33\4\0\2\33\16\0\1\221\24\117\1\u01c6\5\117"+ - "\1\36\12\117\1\0\3\33\1\0\2\33\1\34\3\33"+ - "\3\0\1\33\4\0\2\33\16\0\1\221\1\117\1\u01c7"+ - "\30\117\1\36\12\117\1\0\3\33\1\0\2\33\1\34"+ - "\3\33\3\0\1\33\4\0\2\33\16\0\1\221\1\u01c8"+ - "\31\117\1\36\12\117\1\0\3\33\1\0\2\33\1\34"+ - "\3\33\3\0\1\33\4\0\2\33\16\0\1\221\21\117"+ - "\1\u01c9\10\117\1\36\12\117\1\0\3\33\1\0\2\33"+ - "\1\34\3\33\3\0\1\33\4\0\2\33\16\0\1\221"+ - "\24\117\1\u01ca\5\117\1\36\12\117\1\0\3\33\1\0"+ - "\2\33\1\34\3\33\3\0\1\33\4\0\2\33\16\0"+ - "\1\221\24\117\1\u01cb\5\117\1\36\12\117\1\0\3\33"+ - "\1\0\2\33\1\34\3\33\3\0\1\33\4\0\2\33"+ - "\16\0\1\221\4\117\1\u01cc\25\117\1\36\12\117\1\0"+ - "\3\33\1\0\2\33\1\34\3\33\3\0\1\33\4\0"+ - "\2\33\16\0\1\221\21\117\1\u01cd\10\117\1\36\12\117"+ - "\1\0\3\33\1\0\2\33\1\34\3\33\3\0\1\33"+ - "\4\0\2\33\16\0\1\221\24\117\1\u01ce\5\117\1\36"+ - "\12\117\1\0\3\33\1\0\2\33\1\34\3\33\3\0"+ - "\1\33\4\0\2\33\16\0\1\221\32\117\1\36\7\117"+ - "\1\u01cf\2\117\1\0\3\33\1\0\2\33\1\34\3\33"+ - "\3\0\1\33\4\0\2\33\16\0\1\221\1\u01d0\31\117"+ - "\1\36\12\117\1\0\3\33\1\0\2\33\1\34\3\33"+ - "\3\0\1\33\4\0\2\33\16\0\1\u01d1\32\u0189\1\u01d2"+ - "\12\u0189\11\0\1\u018a\31\0\51\u018a\1\u01d3\3\0\3\u018a"+ - "\1\373\3\0\1\u018a\25\0\4\u01d4\2\0\1\u01d4\15\0"+ - "\1\u01d4\6\0\12\u01d4\1\u01d5\42\0\1\u0154\32\u0155\1\u0154"+ - "\12\u0155\1\u0156\2\u0154\1\u0157\3\u0154\1\u0158\5\0\2\u0154"+ - "\4\0\1\u0154\17\0\1\u0154\32\u0155\1\u018d\12\u0155\1\u0156"+ - "\2\u0154\1\u0157\3\u0154\1\u0158\5\0\2\u0154\4\0\1\u0154"+ - "\17\0\34\u0156\12\u01d6\1\0\2\u0156\1\u0190\3\u0156\1\u0158"+ - "\5\0\2\u0156\4\0\1\u0156\17\0\51\u018f\1\u01d7\3\0"+ - "\3\u018f\1\373\2\0\1\u01d8\1\u018f\25\0\4\u01d9\2\0"+ - "\1\u01d9\15\0\1\u01d9\6\0\12\u01d9\44\0\4\u0154\2\0"+ - "\1\u0154\15\0\1\u0154\6\0\12\u0154\43\0\1\u01da\32\u0192"+ - "\1\u01db\12\u0192\1\u01dc\10\0\1\u018f\32\0\4\u01dd\2\0"+ - "\1\u01dd\15\0\1\u01dd\6\0\12\u01dd\1\u01de\110\0\1\u01df"+ - "\42\0\46\u0195\1\u0197\2\u0195\1\u0198\3\u0195\1\u0199\5\0"+ - "\2\u0195\4\0\1\u0195\17\0\1\u01e0\32\u0196\1\u01e1\12\u0196"+ - "\1\u01e2\2\u0195\1\u0198\3\u0195\1\u0199\1\371\1\372\1\373"+ - "\2\0\2\u0195\4\0\1\u0195\17\0\46\u0197\1\0\2\u0197"+ - "\1\u01e3\3\u0197\1\u0199\5\0\2\u0197\4\0\1\u0197\20\0"+ - "\4\u01e4\2\0\1\u01e4\15\0\1\u01e4\6\0\12\u01e4\44\0"+ - "\32\u01e5\1\0\12\u01e5\13\0\1\u019a\17\0\1\3\2\4"+ - "\2\0\2\27\1\30\1\254\32\10\1\36\12\163\1\341"+ - "\1\33\1\46\1\33\1\0\2\47\1\34\1\321\1\322"+ - "\1\323\2\0\1\27\1\33\4\0\2\33\2\0\1\4"+ - "\3\0\1\3\2\4\2\0\2\27\1\30\1\254\32\10"+ - "\1\36\2\u019b\1\163\2\u019b\2\163\2\u019b\1\163\1\341"+ - "\1\33\1\46\1\33\1\0\2\47\1\34\1\321\1\322"+ - "\1\323\2\0\1\27\1\33\4\0\2\33\2\0\1\4"+ - "\14\0\4\u01e6\2\0\1\u01e6\15\0\1\u01e6\6\0\12\u01e6"+ - "\1\u0161\43\0\4\u01e7\2\0\1\u01e7\15\0\1\u01e7\6\0"+ - "\12\u01e7\1\u01e8\43\0\4\u01e9\2\0\1\u01e9\15\0\1\u01e9"+ - "\6\0\1\u01ea\2\u01eb\1\u01ea\4\u01eb\1\u01ec\1\u01eb\14\0"+ - "\1\264\27\0\4\u01ed\2\0\1\u01ed\15\0\1\u01ed\6\0"+ - "\12\u01ed\1\u01a1\13\0\1\264\27\0\4\u01e9\2\0\1\u01e9"+ - "\15\0\1\u01e9\6\0\1\u01ea\2\u01eb\1\u01ea\4\u01eb\1\u01ec"+ - "\1\u01eb\43\0\1\365\4\u01ed\2\0\1\u01ed\15\0\1\u01ed"+ - "\6\0\12\u01ee\1\u01a1\13\0\1\264\26\0\1\365\4\u01ed"+ - "\2\0\1\u01ed\15\0\1\u01ed\6\0\12\u01ed\1\u01a1\13\0"+ - "\1\264\26\0\1\365\4\u01ed\2\0\1\u01ed\15\0\1\u01ed"+ - "\6\0\2\u01ee\1\u01ed\2\u01ee\2\u01ed\2\u01ee\1\u01ed\1\u01a1"+ - "\13\0\1\264\74\0\1\u0128\13\0\1\264\26\0\1\u01ef"+ - "\33\0\12\u01a7\43\0\1\u01ef\33\0\12\u01f0\43\0\1\u01ef"+ - "\33\0\1\u01a7\1\u01f1\1\u01f0\2\u01a7\2\u01f0\2\u01a7\1\u01f0"+ - "\43\0\1\172\12\63\1\374\17\63\1\173\12\63\43\0"+ - "\1\172\3\63\1\u01f2\26\63\1\173\12\63\43\0\1\172"+ - "\32\63\1\173\4\63\1\u01f3\5\63\43\0\1\172\10\63"+ - "\1\u01f4\12\63\1\u01f5\6\63\1\173\12\63\43\0\1\172"+ - "\32\63\1\173\2\63\1\u01f6\7\63\43\0\1\172\7\63"+ - "\1\u01f7\22\63\1\173\12\63\43\0\1\172\7\63\1\u01f8"+ - "\22\63\1\173\3\63\1\u01f9\6\63\43\0\1\172\7\63"+ - "\1\u01fa\22\63\1\173\12\63\43\0\1\172\31\63\1\u01fb"+ - "\1\173\12\63\43\0\1\172\1\63\1\u01fc\30\63\1\173"+ - "\12\63\43\0\1\172\7\63\1\u01fd\1\63\1\u01fe\20\63"+ - "\1\173\12\63\43\0\1\172\22\63\1\u01ff\7\63\1\173"+ - "\2\63\1\u0200\7\63\43\0\1\172\7\63\1\u0201\22\63"+ - "\1\173\12\63\43\0\1\172\7\63\1\u0202\5\63\1\u0203"+ - "\14\63\1\173\12\63\43\0\1\172\23\63\1\u0204\6\63"+ - "\1\173\12\63\43\0\1\172\32\63\1\173\3\63\1\u0205"+ - "\6\63\43\0\1\172\17\63\1\u0206\12\63\1\173\12\63"+ - "\43\0\1\172\32\63\1\173\1\u0207\11\63\77\0\12\u0208"+ - "\10\0\1\371\1\372\1\373\30\0\1\221\1\u0209\31\117"+ - "\1\36\12\117\1\0\3\33\1\0\2\33\1\34\3\33"+ - "\3\0\1\33\4\0\2\33\16\0\1\221\21\117\1\u020a"+ - "\10\117\1\36\12\117\1\0\3\33\1\0\2\33\1\34"+ - "\3\33\3\0\1\33\4\0\2\33\16\0\1\221\16\117"+ - "\1\u020b\4\117\1\u020c\6\117\1\36\12\117\1\0\3\33"+ - "\1\0\2\33\1\34\3\33\3\0\1\33\4\0\2\33"+ - "\16\0\1\221\32\117\1\36\10\117\1\u020d\1\117\1\0"+ - "\3\33\1\0\2\33\1\34\3\33\3\0\1\33\4\0"+ - "\2\33\16\0\1\221\32\117\1\36\10\117\1\u020e\1\117"+ - "\1\0\3\33\1\0\2\33\1\34\3\33\3\0\1\33"+ - "\4\0\2\33\16\0\1\221\1\u020f\2\117\1\u0210\26\117"+ - "\1\36\12\117\1\0\3\33\1\0\2\33\1\34\3\33"+ - "\3\0\1\33\4\0\2\33\16\0\1\221\16\117\1\u0211"+ - "\13\117\1\36\12\117\1\0\3\33\1\0\2\33\1\34"+ - "\3\33\3\0\1\33\4\0\2\33\16\0\1\221\25\117"+ - "\1\u0212\4\117\1\36\12\117\1\0\3\33\1\0\2\33"+ - "\1\34\3\33\3\0\1\33\4\0\2\33\16\0\1\221"+ - "\32\117\1\36\10\117\1\u0213\1\117\1\0\3\33\1\0"+ - "\2\33\1\34\3\33\3\0\1\33\4\0\2\33\16\0"+ - "\1\221\23\117\1\u0214\6\117\1\36\12\117\1\0\3\33"+ - "\1\0\2\33\1\34\3\33\3\0\1\33\4\0\2\33"+ - "\16\0\1\221\31\117\1\u0215\1\36\12\117\1\0\3\33"+ - "\1\0\2\33\1\34\3\33\3\0\1\33\4\0\2\33"+ - "\16\0\1\221\26\117\1\u0216\3\117\1\36\12\117\1\0"+ - "\3\33\1\0\2\33\1\34\3\33\3\0\1\33\4\0"+ - "\2\33\16\0\1\221\11\117\1\u0217\20\117\1\36\12\117"+ - "\1\0\3\33\1\0\2\33\1\34\3\33\3\0\1\33"+ - "\4\0\2\33\16\0\1\221\32\117\1\36\3\117\1\u0218"+ - "\6\117\1\0\3\33\1\0\2\33\1\34\3\33\3\0"+ - "\1\33\4\0\2\33\16\0\1\221\10\117\1\u0219\21\117"+ - "\1\36\12\117\1\0\3\33\1\0\2\33\1\34\3\33"+ - "\3\0\1\33\4\0\2\33\16\0\1\221\3\117\1\u021a"+ - "\26\117\1\36\12\117\1\0\3\33\1\0\2\33\1\34"+ - "\3\33\3\0\1\33\4\0\2\33\16\0\1\221\21\117"+ - "\1\u021b\6\117\1\u021c\1\117\1\36\12\117\1\0\3\33"+ - "\1\0\2\33\1\34\3\33\3\0\1\33\4\0\2\33"+ - "\16\0\1\221\12\117\1\u021d\17\117\1\36\12\117\1\0"+ - "\3\33\1\0\2\33\1\34\3\33\3\0\1\33\4\0"+ - "\2\33\16\0\1\221\32\117\1\36\1\117\1\u021e\10\117"+ - "\1\0\3\33\1\0\2\33\1\34\3\33\3\0\1\33"+ - "\4\0\2\33\16\0\1\221\24\117\1\u021f\5\117\1\36"+ - "\12\117\1\0\3\33\1\0\2\33\1\34\3\33\3\0"+ - "\1\33\4\0\2\33\16\0\1\221\31\117\1\u0220\1\36"+ - "\12\117\1\0\3\33\1\0\2\33\1\34\3\33\3\0"+ - "\1\33\4\0\2\33\17\0\32\u0189\1\0\12\u0189\44\0"+ - "\32\u0189\1\u01d2\12\u0189\44\0\4\u0221\2\0\1\u0221\15\0"+ - "\1\u0221\6\0\12\u0221\44\0\4\u0222\2\0\1\u0222\15\0"+ - "\1\u0222\6\0\12\u0222\1\u0223\110\0\1\u0224\42\0\34\u0156"+ - "\12\u0225\1\0\2\u0156\1\u0190\3\u0156\1\u0158\1\0\1\u018f"+ - "\3\0\2\u0156\4\0\1\u0156\20\0\4\u0226\2\0\1\u0226"+ - "\15\0\1\u0226\6\0\12\u0226\63\0\1\u0227\71\0\4\u0156"+ - "\2\0\1\u0156\15\0\1\u0156\6\0\12\u0156\44\0\32\u0192"+ - "\1\0\12\u0192\44\0\32\u0192\1\u01db\12\u0192\77\0\12\u0228"+ - "\44\0\4\u0229\2\0\1\u0229\15\0\1\u0229\6\0\12\u0229"+ - "\1\u01de\43\0\4\u022a\2\0\1\u022a\15\0\1\u022a\6\0"+ - "\12\u022a\1\u022b\43\0\4\u022c\2\0\1\u022c\15\0\1\u022c"+ - "\6\0\1\u022d\2\u022e\1\u022d\4\u022e\1\u022f\1\u022e\14\0"+ - "\1\u0230\26\0\1\u0195\32\u0196\1\u0195\12\u0196\1\u0197\2\u0195"+ - "\1\u0198\3\u0195\1\u0199\5\0\2\u0195\4\0\1\u0195\17\0"+ - "\1\u0195\32\u0196\1\u01e1\12\u0196\1\u0197\2\u0195\1\u0198\3\u0195"+ - "\1\u0199\5\0\2\u0195\4\0\1\u0195\17\0\34\u0197\12\u0231"+ - "\1\0\2\u0197\1\u01e3\3\u0197\1\u0199\5\0\2\u0197\4\0"+ - "\1\u0197\20\0\4\u0232\2\0\1\u0232\15\0\1\u0232\6\0"+ - "\12\u0232\44\0\4\u0195\2\0\1\u0195\15\0\1\u0195\6\0"+ - "\12\u0195\43\0\1\u0233\32\u01e5\1\u0234\12\u01e5\1\341\7\0"+ - "\1\371\1\372\1\373\76\0\1\u0161\43\0\4\u0235\2\0"+ - "\1\u0235\15\0\1\u0235\6\0\12\u0235\1\u01e8\43\0\4\u0236"+ - "\2\0\1\u0236\15\0\1\u0236\6\0\12\u0236\1\u0237\43\0"+ - "\4\u0238\2\0\1\u0238\15\0\1\u0238\6\0\12\u0238\1\u0239"+ - "\13\0\1\264\26\0\1\365\4\u0238\2\0\1\u0238\15\0"+ - "\1\u0238\6\0\12\u023a\1\u0239\13\0\1\264\26\0\1\365"+ - "\4\u0238\2\0\1\u0238\15\0\1\u0238\6\0\12\u023b\1\u0239"+ - "\13\0\1\264\26\0\1\365\4\u0238\2\0\1\u0238\15\0"+ - "\1\u0238\6\0\1\u023a\1\u023c\1\u023b\2\u023a\2\u023b\2\u023a"+ - "\1\u023b\1\u0239\13\0\1\264\27\0\4\u023d\2\0\1\u023d"+ - "\15\0\1\u023d\6\0\12\u023d\1\u01a1\13\0\1\264\26\0"+ - "\1\365\4\u023d\2\0\1\u023d\15\0\1\u023d\6\0\12\u023d"+ - "\1\u01a1\13\0\1\264\62\0\1\u023e\2\u023f\1\u023e\4\u023f"+ - "\1\u0240\1\u023f\43\0\1\u01ef\110\0\1\u01ef\33\0\2\u01f0"+ - "\1\0\2\u01f0\2\0\2\u01f0\44\0\1\172\24\63\1\u0241"+ - "\5\63\1\173\12\63\43\0\1\172\1\63\1\u0242\30\63"+ - "\1\173\12\63\43\0\1\172\14\63\1\u0243\15\63\1\173"+ - "\12\63\43\0\1\172\1\63\1\u0244\30\63\1\173\12\63"+ - "\43\0\1\172\21\63\1\u0245\10\63\1\173\12\63\43\0"+ - "\1\172\24\63\1\u0246\5\63\1\173\12\63\43\0\1\172"+ - "\24\63\1\u0247\5\63\1\173\12\63\43\0\1\172\1\u0137"+ - "\31\63\1\173\12\63\43\0\1\172\24\63\1\u0248\5\63"+ - "\1\173\12\63\43\0\1\172\1\63\1\u0249\30\63\1\173"+ - "\12\63\43\0\1\172\31\63\1\u024a\1\173\12\63\43\0"+ - "\1\172\24\63\1\u024b\5\63\1\173\12\63\43\0\1\172"+ - "\1\63\1\u024c\30\63\1\173\12\63\43\0\1\172\1\u024d"+ - "\31\63\1\173\12\63\43\0\1\172\21\63\1\u024e\10\63"+ - "\1\173\12\63\43\0\1\172\24\63\1\u024f\5\63\1\173"+ - "\12\63\43\0\1\172\24\63\1\u0250\5\63\1\173\12\63"+ - "\43\0\1\172\4\63\1\u0251\25\63\1\173\12\63\43\0"+ - "\1\172\21\63\1\u0252\10\63\1\173\12\63\43\0\1\172"+ - "\24\63\1\u0253\5\63\1\173\12\63\43\0\1\172\32\63"+ - "\1\173\7\63\1\u0254\2\63\43\0\1\172\1\u0255\31\63"+ - "\1\173\12\63\121\0\1\371\1\372\1\373\30\0\1\221"+ - "\32\117\1\36\1\u0256\11\117\1\0\3\33\1\0\2\33"+ - "\1\34\3\33\3\0\1\33\4\0\2\33\16\0\1\221"+ - "\32\117\1\36\7\117\1\u0257\2\117\1\0\3\33\1\0"+ - "\2\33\1\34\3\33\3\0\1\33\4\0\2\33\16\0"+ - "\1\221\32\117\1\36\6\117\1\350\3\117\1\0\3\33"+ - "\1\0\2\33\1\34\3\33\3\0\1\33\4\0\2\33"+ - "\16\0\1\221\32\117\1\36\5\117\1\350\4\117\1\0"+ - "\3\33\1\0\2\33\1\34\3\33\3\0\1\33\4\0"+ - "\2\33\16\0\1\221\1\117\1\u0258\30\117\1\36\12\117"+ - "\1\0\3\33\1\0\2\33\1\34\3\33\3\0\1\33"+ - "\4\0\2\33\16\0\1\221\32\117\1\36\1\117\1\u0259"+ - "\10\117\1\0\3\33\1\0\2\33\1\34\3\33\3\0"+ - "\1\33\4\0\2\33\16\0\1\221\1\u025a\27\117\1\u025b"+ - "\1\117\1\36\12\117\1\0\3\33\1\0\2\33\1\34"+ - "\3\33\3\0\1\33\4\0\2\33\16\0\1\221\4\117"+ - "\1\u025c\25\117\1\36\12\117\1\0\3\33\1\0\2\33"+ - "\1\34\3\33\3\0\1\33\4\0\2\33\16\0\1\221"+ - "\32\117\1\36\1\u025d\11\117\1\0\3\33\1\0\2\33"+ - "\1\34\3\33\3\0\1\33\4\0\2\33\16\0\1\221"+ - "\32\117\1\36\3\117\1\u025e\6\117\1\0\3\33\1\0"+ - "\2\33\1\34\3\33\3\0\1\33\4\0\2\33\16\0"+ - "\1\221\1\u025f\31\117\1\36\12\117\1\0\3\33\1\0"+ - "\2\33\1\34\3\33\3\0\1\33\4\0\2\33\16\0"+ - "\1\221\1\u0257\31\117\1\36\12\117\1\0\3\33\1\0"+ - "\2\33\1\34\3\33\3\0\1\33\4\0\2\33\16\0"+ - "\1\221\32\117\1\36\2\117\1\u0260\7\117\1\0\3\33"+ - "\1\0\2\33\1\34\3\33\3\0\1\33\4\0\2\33"+ - "\16\0\1\221\32\117\1\36\2\117\1\u0261\7\117\1\0"+ - "\3\33\1\0\2\33\1\34\3\33\3\0\1\33\4\0"+ - "\2\33\16\0\1\221\15\117\1\u0262\14\117\1\36\12\117"+ - "\1\0\3\33\1\0\2\33\1\34\3\33\3\0\1\33"+ - "\4\0\2\33\16\0\1\221\32\117\1\36\5\117\1\u0263"+ - "\4\117\1\0\3\33\1\0\2\33\1\34\3\33\3\0"+ - "\1\33\4\0\2\33\16\0\1\221\32\117\1\36\10\117"+ - "\1\u0264\1\117\1\0\3\33\1\0\2\33\1\34\3\33"+ - "\3\0\1\33\4\0\2\33\16\0\1\221\1\117\1\u0265"+ - "\30\117\1\36\12\117\1\0\3\33\1\0\2\33\1\34"+ - "\3\33\3\0\1\33\4\0\2\33\16\0\1\221\32\117"+ - "\1\36\3\117\1\u0266\6\117\1\0\3\33\1\0\2\33"+ - "\1\34\3\33\3\0\1\33\4\0\2\33\16\0\1\221"+ - "\32\117\1\36\1\117\1\u0267\10\117\1\0\3\33\1\0"+ - "\2\33\1\34\3\33\3\0\1\33\4\0\2\33\16\0"+ - "\1\221\32\117\1\36\1\117\1\u0268\10\117\1\0\3\33"+ - "\1\0\2\33\1\34\3\33\3\0\1\33\4\0\2\33"+ - "\16\0\1\221\24\117\1\u0269\5\117\1\36\12\117\1\0"+ - "\3\33\1\0\2\33\1\34\3\33\3\0\1\33\4\0"+ - "\2\33\16\0\1\221\32\117\1\36\3\117\1\u026a\6\117"+ - "\1\0\3\33\1\0\2\33\1\34\3\33\3\0\1\33"+ - "\4\0\2\33\16\0\1\221\25\117\1\u026b\4\117\1\36"+ - "\12\117\1\0\3\33\1\0\2\33\1\34\3\33\3\0"+ - "\1\33\4\0\2\33\17\0\4\u018a\2\0\1\u018a\15\0"+ - "\1\u018a\6\0\12\u018a\44\0\4\u026c\2\0\1\u026c\15\0"+ - "\1\u026c\6\0\12\u026c\1\u0223\43\0\4\u026d\2\0\1\u026d"+ - "\15\0\1\u026d\6\0\12\u026d\1\u026e\43\0\4\u026f\2\0"+ - "\1\u026f\15\0\1\u026f\6\0\1\u0270\2\u0271\1\u0270\4\u0271"+ - "\1\u0272\1\u0271\14\0\1\u0273\26\0\34\u0156\12\u0274\1\0"+ - "\2\u0156\1\u0190\3\u0156\1\u0158\1\0\1\u018f\3\0\2\u0156"+ - "\4\0\1\u0156\20\0\4\u018f\2\0\1\u018f\15\0\1\u018f"+ - "\6\0\12\u018f\74\0\1\u0275\113\0\12\u0276\11\0\1\u018f"+ - "\32\0\4\u0277\2\0\1\u0277\15\0\1\u0277\6\0\12\u0277"+ - "\1\u01de\43\0\4\u0278\2\0\1\u0278\15\0\1\u0278\6\0"+ - "\12\u0278\1\u0279\43\0\4\u027a\2\0\1\u027a\15\0\1\u027a"+ - "\6\0\1\u027b\2\u027c\1\u027b\4\u027c\1\u027d\1\u027c\14\0"+ - "\1\u0230\27\0\4\u027e\2\0\1\u027e\15\0\1\u027e\6\0"+ - "\12\u027e\1\u027f\13\0\1\u0230\26\0\1\u0280\4\u027e\2\0"+ - "\1\u027e\15\0\1\u027e\6\0\12\u0281\1\u027f\13\0\1\u0230"+ - "\26\0\1\u0280\4\u027e\2\0\1\u027e\15\0\1\u027e\6\0"+ - "\12\u0282\1\u027f\13\0\1\u0230\26\0\1\u0280\4\u027e\2\0"+ - "\1\u027e\15\0\1\u027e\6\0\1\u0281\1\u0283\1\u0282\2\u0281"+ - "\2\u0282\2\u0281\1\u0282\1\u027f\13\0\1\u0230\74\0\1\u01dc"+ - "\10\0\1\u018f\31\0\34\u0197\12\u0284\1\0\2\u0197\1\u01e3"+ - "\3\u0197\1\u0199\1\371\1\372\1\373\2\0\2\u0197\4\0"+ - "\1\u0197\20\0\4\u0197\2\0\1\u0197\15\0\1\u0197\6\0"+ - "\12\u0197\44\0\32\u01e5\1\0\12\u01e5\44\0\32\u01e5\1\u0234"+ - "\12\u01e5\44\0\4\u0285\2\0\1\u0285\15\0\1\u0285\6\0"+ - "\12\u0285\1\u01e8\43\0\4\u0286\2\0\1\u0286\15\0\1\u0286"+ - "\6\0\12\u0286\1\u0287\43\0\4\u0288\2\0\1\u0288\15\0"+ - "\1\u0288\6\0\1\u0289\2\u028a\1\u0289\4\u028a\1\u028b\1\u028a"+ - "\14\0\1\264\27\0\4\u028c\2\0\1\u028c\15\0\1\u028c"+ - "\6\0\12\u028c\1\u0239\13\0\1\264\27\0\4\u0288\2\0"+ - "\1\u0288\15\0\1\u0288\6\0\1\u0289\2\u028a\1\u0289\4\u028a"+ - "\1\u028b\1\u028a\43\0\1\365\4\u028c\2\0\1\u028c\15\0"+ - "\1\u028c\6\0\12\u028d\1\u0239\13\0\1\264\26\0\1\365"+ - "\4\u028c\2\0\1\u028c\15\0\1\u028c\6\0\12\u028c\1\u0239"+ - "\13\0\1\264\26\0\1\365\4\u028c\2\0\1\u028c\15\0"+ - "\1\u028c\6\0\2\u028d\1\u028c\2\u028d\2\u028c\2\u028d\1\u028c"+ - "\1\u0239\13\0\1\264\74\0\1\u01a1\13\0\1\264\62\0"+ - "\12\u023f\14\0\1\264\62\0\12\u028e\14\0\1\264\62\0"+ - "\1\u023f\1\u028f\1\u028e\2\u023f\2\u028e\2\u023f\1\u028e\14\0"+ - "\1\264\26\0\1\172\1\u0290\31\63\1\173\12\63\43\0"+ - "\1\172\21\63\1\u0291\10\63\1\173\12\63\43\0\1\172"+ - "\16\63\1\u0292\4\63\1\u0293\6\63\1\173\12\63\43\0"+ - "\1\172\32\63\1\173\10\63\1\u0294\1\63\43\0\1\172"+ - "\32\63\1\173\10\63\1\u0295\1\63\43\0\1\172\1\u0296"+ - "\2\63\1\u0297\26\63\1\173\12\63\43\0\1\172\16\63"+ - "\1\u0298\13\63\1\173\12\63\43\0\1\172\25\63\1\u0299"+ - "\4\63\1\173\12\63\43\0\1\172\32\63\1\173\10\63"+ - "\1\u029a\1\63\43\0\1\172\23\63\1\u029b\6\63\1\173"+ - "\12\63\43\0\1\172\31\63\1\u029c\1\173\12\63\43\0"+ - "\1\172\26\63\1\u029d\3\63\1\173\12\63\43\0\1\172"+ - "\11\63\1\u029e\20\63\1\173\12\63\43\0\1\172\32\63"+ - "\1\173\3\63\1\u029f\6\63\43\0\1\172\10\63\1\u02a0"+ - "\21\63\1\173\12\63\43\0\1\172\3\63\1\u02a1\26\63"+ - "\1\173\12\63\43\0\1\172\21\63\1\u02a2\6\63\1\u02a3"+ - "\1\63\1\173\12\63\43\0\1\172\12\63\1\u02a4\17\63"+ - "\1\173\12\63\43\0\1\172\32\63\1\173\1\63\1\u02a5"+ - "\10\63\43\0\1\172\24\63\1\u02a6\5\63\1\173\12\63"+ - "\43\0\1\172\31\63\1\u02a7\1\173\12\63\43\0\1\221"+ - "\1\u02a8\31\117\1\36\12\117\1\0\3\33\1\0\2\33"+ - "\1\34\3\33\3\0\1\33\4\0\2\33\16\0\1\221"+ - "\25\117\1\226\4\117\1\36\12\117\1\0\3\33\1\0"+ - "\2\33\1\34\3\33\3\0\1\33\4\0\2\33\16\0"+ - "\1\221\32\117\1\36\5\117\1\u02a9\4\117\1\0\3\33"+ - "\1\0\2\33\1\34\3\33\3\0\1\33\4\0\2\33"+ - "\16\0\1\221\32\117\1\36\3\117\1\u02a8\6\117\1\0"+ - "\3\33\1\0\2\33\1\34\3\33\3\0\1\33\4\0"+ - "\2\33\16\0\1\221\12\117\1\u02aa\17\117\1\36\12\117"+ - "\1\0\3\33\1\0\2\33\1\34\3\33\3\0\1\33"+ - "\4\0\2\33\16\0\1\221\25\117\1\u02ab\4\117\1\36"+ - "\12\117\1\0\3\33\1\0\2\33\1\34\3\33\3\0"+ - "\1\33\4\0\2\33\16\0\1\221\15\117\1\u02ac\14\117"+ - "\1\36\12\117\1\0\3\33\1\0\2\33\1\34\3\33"+ - "\3\0\1\33\4\0\2\33\16\0\1\221\2\117\1\u0257"+ - "\27\117\1\36\12\117\1\0\3\33\1\0\2\33\1\34"+ - "\3\33\3\0\1\33\4\0\2\33\16\0\1\221\1\117"+ - "\1\226\30\117\1\36\12\117\1\0\3\33\1\0\2\33"+ - "\1\34\3\33\3\0\1\33\4\0\2\33\16\0\1\221"+ - "\11\117\1\u02ad\20\117\1\36\12\117\1\0\3\33\1\0"+ - "\2\33\1\34\3\33\3\0\1\33\4\0\2\33\16\0"+ - "\1\221\1\u02ae\31\117\1\36\12\117\1\0\3\33\1\0"+ - "\2\33\1\34\3\33\3\0\1\33\4\0\2\33\16\0"+ - "\1\221\1\u02af\31\117\1\36\12\117\1\0\3\33\1\0"+ - "\2\33\1\34\3\33\3\0\1\33\4\0\2\33\16\0"+ - "\1\221\2\117\1\u02b0\27\117\1\36\12\117\1\0\3\33"+ - "\1\0\2\33\1\34\3\33\3\0\1\33\4\0\2\33"+ - "\16\0\1\221\32\117\1\36\4\117\1\235\5\117\1\0"+ - "\3\33\1\0\2\33\1\34\3\33\3\0\1\33\4\0"+ - "\2\33\16\0\1\221\1\u02b1\31\117\1\36\12\117\1\0"+ - "\3\33\1\0\2\33\1\34\3\33\3\0\1\33\4\0"+ - "\2\33\16\0\1\221\25\117\1\u02b2\4\117\1\36\12\117"+ - "\1\0\3\33\1\0\2\33\1\34\3\33\3\0\1\33"+ - "\4\0\2\33\16\0\1\221\32\117\1\36\4\117\1\u02a8"+ - "\5\117\1\0\3\33\1\0\2\33\1\34\3\33\3\0"+ - "\1\33\4\0\2\33\16\0\1\221\32\117\1\36\11\117"+ - "\1\u02a8\1\0\3\33\1\0\2\33\1\34\3\33\3\0"+ - "\1\33\4\0\2\33\16\0\1\221\32\117\1\36\2\117"+ - "\1\u02a8\7\117\1\0\3\33\1\0\2\33\1\34\3\33"+ - "\3\0\1\33\4\0\2\33\16\0\1\221\16\117\1\u02b3"+ - "\13\117\1\36\12\117\1\0\3\33\1\0\2\33\1\34"+ - "\3\33\3\0\1\33\4\0\2\33\16\0\1\221\32\117"+ - "\1\36\3\117\1\u02b4\6\117\1\0\3\33\1\0\2\33"+ - "\1\34\3\33\3\0\1\33\4\0\2\33\16\0\1\221"+ - "\24\117\1\u02b5\5\117\1\36\12\117\1\0\3\33\1\0"+ - "\2\33\1\34\3\33\3\0\1\33\4\0\2\33\17\0"+ - "\4\u02b6\2\0\1\u02b6\15\0\1\u02b6\6\0\12\u02b6\1\u0223"+ - "\43\0\4\u02b7\2\0\1\u02b7\15\0\1\u02b7\6\0\12\u02b7"+ - "\1\u02b8\43\0\4\u02b9\2\0\1\u02b9\15\0\1\u02b9\6\0"+ - "\1\u02ba\2\u02bb\1\u02ba\4\u02bb\1\u02bc\1\u02bb\14\0\1\u0273"+ - "\27\0\4\u02bd\2\0\1\u02bd\15\0\1\u02bd\6\0\12\u02bd"+ - "\1\u02be\13\0\1\u0273\26\0\1\u02bf\4\u02bd\2\0\1\u02bd"+ - "\15\0\1\u02bd\6\0\12\u02c0\1\u02be\13\0\1\u0273\26\0"+ - "\1\u02bf\4\u02bd\2\0\1\u02bd\15\0\1\u02bd\6\0\12\u02c1"+ - "\1\u02be\13\0\1\u0273\26\0\1\u02bf\4\u02bd\2\0\1\u02bd"+ - "\15\0\1\u02bd\6\0\1\u02c0\1\u02c2\1\u02c1\2\u02c0\2\u02c1"+ - "\2\u02c0\1\u02c1\1\u02be\13\0\1\u0273\105\0\1\u018a\31\0"+ - "\34\u0156\12\u02c3\1\0\2\u0156\1\u0190\3\u0156\1\u0158\1\0"+ - "\1\u018f\3\0\2\u0156\4\0\1\u0156\35\0\1\u02c4\126\0"+ - "\12\u02c5\11\0\1\u018f\77\0\1\u01de\43\0\4\u02c6\2\0"+ - "\1\u02c6\15\0\1\u02c6\6\0\12\u02c6\1\u0279\43\0\4\u02c7"+ - "\2\0\1\u02c7\15\0\1\u02c7\6\0\12\u02c7\1\u02c8\43\0"+ - "\4\u02c9\2\0\1\u02c9\15\0\1\u02c9\6\0\12\u02c9\1\u02ca"+ - "\13\0\1\u0230\26\0\1\u0280\4\u02c9\2\0\1\u02c9\15\0"+ - "\1\u02c9\6\0\12\u02cb\1\u02ca\13\0\1\u0230\26\0\1\u0280"+ - "\4\u02c9\2\0\1\u02c9\15\0\1\u02c9\6\0\12\u02cc\1\u02ca"+ - "\13\0\1\u0230\26\0\1\u0280\4\u02c9\2\0\1\u02c9\15\0"+ - "\1\u02c9\6\0\1\u02cb\1\u02cd\1\u02cc\2\u02cb\2\u02cc\2\u02cb"+ - "\1\u02cc\1\u02ca\13\0\1\u0230\27\0\4\u02ce\2\0\1\u02ce"+ - "\15\0\1\u02ce\6\0\12\u02ce\1\u027f\13\0\1\u0230\27\0"+ - "\4\u027a\2\0\1\u027a\15\0\1\u027a\6\0\1\u027b\2\u027c"+ - "\1\u027b\4\u027c\1\u027d\1\u027c\77\0\1\u02cf\2\u02d0\1\u02cf"+ - "\4\u02d0\1\u02d1\1\u02d0\43\0\1\u0280\4\u02ce\2\0\1\u02ce"+ - "\15\0\1\u02ce\6\0\12\u02d2\1\u027f\13\0\1\u0230\26\0"+ - "\1\u0280\4\u02ce\2\0\1\u02ce\15\0\1\u02ce\6\0\12\u02ce"+ - "\1\u027f\13\0\1\u0230\26\0\1\u0280\4\u02ce\2\0\1\u02ce"+ - "\15\0\1\u02ce\6\0\2\u02d2\1\u02ce\2\u02d2\2\u02ce\2\u02d2"+ - "\1\u02ce\1\u027f\13\0\1\u0230\26\0\34\u0197\12\u02d3\1\0"+ - "\2\u0197\1\u01e3\3\u0197\1\u0199\1\371\1\372\1\373\2\0"+ - "\2\u0197\4\0\1\u0197\65\0\1\u01e8\43\0\4\u02d4\2\0"+ - "\1\u02d4\15\0\1\u02d4\6\0\12\u02d4\1\u0287\43\0\4\u02d5"+ - "\2\0\1\u02d5\15\0\1\u02d5\6\0\12\u02d5\1\u02d6\43\0"+ - "\4\u02d7\2\0\1\u02d7\15\0\1\u02d7\6\0\12\u02d7\1\u02d8"+ - "\13\0\1\264\26\0\1\365\4\u02d7\2\0\1\u02d7\15\0"+ - "\1\u02d7\6\0\12\u02d9\1\u02d8\13\0\1\264\26\0\1\365"+ - "\4\u02d7\2\0\1\u02d7\15\0\1\u02d7\6\0\12\u02da\1\u02d8"+ - "\13\0\1\264\26\0\1\365\4\u02d7\2\0\1\u02d7\15\0"+ - "\1\u02d7\6\0\1\u02d9\1\u02db\1\u02da\2\u02d9\2\u02da\2\u02d9"+ - "\1\u02da\1\u02d8\13\0\1\264\27\0\4\u02dc\2\0\1\u02dc"+ - "\15\0\1\u02dc\6\0\12\u02dc\1\u0239\13\0\1\264\26\0"+ - "\1\365\4\u02dc\2\0\1\u02dc\15\0\1\u02dc\6\0\12\u02dc"+ - "\1\u0239\13\0\1\264\110\0\1\264\62\0\2\u028e\1\0"+ - "\2\u028e\2\0\2\u028e\15\0\1\264\26\0\1\172\32\63"+ - "\1\173\1\u02dd\11\63\43\0\1\172\32\63\1\173\7\63"+ - "\1\u02de\2\63\43\0\1\172\32\63\1\173\6\63\1\u013c"+ - "\3\63\43\0\1\172\32\63\1\173\5\63\1\u013c\4\63"+ - "\43\0\1\172\1\63\1\u02df\30\63\1\173\12\63\43\0"+ - "\1\172\32\63\1\173\1\63\1\u02e0\10\63\43\0\1\172"+ - "\1\u02e1\27\63\1\u02e2\1\63\1\173\12\63\43\0\1\172"+ - "\4\63\1\u02e3\25\63\1\173\12\63\43\0\1\172\32\63"+ - "\1\173\1\u02e4\11\63\43\0\1\172\32\63\1\173\3\63"+ - "\1\u02e5\6\63\43\0\1\172\1\u02e6\31\63\1\173\12\63"+ - "\43\0\1\172\1\u02de\31\63\1\173\12\63\43\0\1\172"+ - "\32\63\1\173\2\63\1\u02e7\7\63\43\0\1\172\32\63"+ - "\1\173\2\63\1\u02e8\7\63\43\0\1\172\15\63\1\u02e9"+ - "\14\63\1\173\12\63\43\0\1\172\32\63\1\173\5\63"+ - "\1\u02ea\4\63\43\0\1\172\32\63\1\173\10\63\1\u02eb"+ - "\1\63\43\0\1\172\1\63\1\u02ec\30\63\1\173\12\63"+ - "\43\0\1\172\32\63\1\173\3\63\1\u02ed\6\63\43\0"+ - "\1\172\32\63\1\173\1\63\1\u02ee\10\63\43\0\1\172"+ - "\32\63\1\173\1\63\1\u02ef\10\63\43\0\1\172\24\63"+ - "\1\u02f0\5\63\1\173\12\63\43\0\1\172\32\63\1\173"+ - "\3\63\1\u02f1\6\63\43\0\1\172\25\63\1\u02f2\4\63"+ - "\1\173\12\63\43\0\1\221\2\117\1\226\27\117\1\36"+ - "\12\117\1\0\3\33\1\0\2\33\1\34\3\33\3\0"+ - "\1\33\4\0\2\33\16\0\1\221\3\117\1\u02f3\26\117"+ - "\1\36\12\117\1\0\3\33\1\0\2\33\1\34\3\33"+ - "\3\0\1\33\4\0\2\33\16\0\1\221\32\117\1\36"+ - "\11\117\1\u02f4\1\0\3\33\1\0\2\33\1\34\3\33"+ - "\3\0\1\33\4\0\2\33\16\0\1\221\32\117\1\36"+ - "\11\117\1\u02f5\1\0\3\33\1\0\2\33\1\34\3\33"+ - "\3\0\1\33\4\0\2\33\16\0\1\221\32\117\1\36"+ - "\7\117\1\u02f6\2\117\1\0\3\33\1\0\2\33\1\34"+ - "\3\33\3\0\1\33\4\0\2\33\16\0\1\221\32\117"+ - "\1\36\4\117\1\u02f7\5\117\1\0\3\33\1\0\2\33"+ - "\1\34\3\33\3\0\1\33\4\0\2\33\16\0\1\221"+ - "\26\117\1\u02f8\3\117\1\36\12\117\1\0\3\33\1\0"+ - "\2\33\1\34\3\33\3\0\1\33\4\0\2\33\16\0"+ - "\1\221\30\117\1\u02f9\1\117\1\36\12\117\1\0\3\33"+ - "\1\0\2\33\1\34\3\33\3\0\1\33\4\0\2\33"+ - "\16\0\1\221\11\117\1\343\20\117\1\36\12\117\1\0"+ - "\3\33\1\0\2\33\1\34\3\33\3\0\1\33\4\0"+ - "\2\33\16\0\1\221\12\117\1\u02fa\17\117\1\36\12\117"+ - "\1\0\3\33\1\0\2\33\1\34\3\33\3\0\1\33"+ - "\4\0\2\33\16\0\1\221\17\117\1\236\12\117\1\36"+ - "\12\117\1\0\3\33\1\0\2\33\1\34\3\33\3\0"+ - "\1\33\4\0\2\33\16\0\1\221\32\117\1\36\4\117"+ - "\1\u02fb\5\117\1\0\3\33\1\0\2\33\1\34\3\33"+ - "\3\0\1\33\4\0\2\33\16\0\1\221\30\117\1\u02fc"+ - "\1\117\1\36\12\117\1\0\3\33\1\0\2\33\1\34"+ - "\3\33\3\0\1\33\4\0\2\33\16\0\1\221\30\117"+ - "\1\u02fd\1\117\1\36\12\117\1\0\3\33\1\0\2\33"+ - "\1\34\3\33\3\0\1\33\4\0\2\33\64\0\1\u0223"+ - "\43\0\4\u02fe\2\0\1\u02fe\15\0\1\u02fe\6\0\12\u02fe"+ - "\1\u02b8\43\0\4\u02ff\2\0\1\u02ff\15\0\1\u02ff\6\0"+ - "\12\u02ff\1\u0300\43\0\4\u0301\2\0\1\u0301\15\0\1\u0301"+ - "\6\0\12\u0301\1\u0302\13\0\1\u0273\26\0\1\u02bf\4\u0301"+ - "\2\0\1\u0301\15\0\1\u0301\6\0\12\u0303\1\u0302\13\0"+ - "\1\u0273\26\0\1\u02bf\4\u0301\2\0\1\u0301\15\0\1\u0301"+ - "\6\0\12\u0304\1\u0302\13\0\1\u0273\26\0\1\u02bf\4\u0301"+ - "\2\0\1\u0301\15\0\1\u0301\6\0\1\u0303\1\u0305\1\u0304"+ - "\2\u0303\2\u0304\2\u0303\1\u0304\1\u0302\13\0\1\u0273\27\0"+ - "\4\u0306\2\0\1\u0306\15\0\1\u0306\6\0\12\u0306\1\u02be"+ - "\13\0\1\u0273\27\0\4\u02b9\2\0\1\u02b9\15\0\1\u02b9"+ - "\6\0\1\u02ba\2\u02bb\1\u02ba\4\u02bb\1\u02bc\1\u02bb\77\0"+ - "\1\u0307\2\u0308\1\u0307\4\u0308\1\u0309\1\u0308\43\0\1\u02bf"+ - "\4\u0306\2\0\1\u0306\15\0\1\u0306\6\0\12\u030a\1\u02be"+ - "\13\0\1\u0273\26\0\1\u02bf\4\u0306\2\0\1\u0306\15\0"+ - "\1\u0306\6\0\12\u0306\1\u02be\13\0\1\u0273\26\0\1\u02bf"+ - "\4\u0306\2\0\1\u0306\15\0\1\u0306\6\0\2\u030a\1\u0306"+ - "\2\u030a\2\u0306\2\u030a\1\u0306\1\u02be\13\0\1\u0273\26\0"+ - "\34\u0156\12\u030b\1\0\2\u0156\1\u0190\3\u0156\1\u0158\1\0"+ - "\1\u018f\3\0\2\u0156\4\0\1\u0156\23\0\1\u030c\140\0"+ - "\12\u030d\11\0\1\u018f\32\0\4\u030e\2\0\1\u030e\15\0"+ - "\1\u030e\6\0\12\u030e\1\u0279\43\0\4\u030f\2\0\1\u030f"+ - "\15\0\1\u030f\6\0\12\u030f\1\u0310\43\0\4\u0311\2\0"+ - "\1\u0311\15\0\1\u0311\6\0\1\u0312\2\u0313\1\u0312\4\u0313"+ - "\1\u0314\1\u0313\14\0\1\u0230\27\0\4\u0315\2\0\1\u0315"+ - "\15\0\1\u0315\6\0\12\u0315\1\u02ca\13\0\1\u0230\27\0"+ - "\4\u0311\2\0\1\u0311\15\0\1\u0311\6\0\1\u0312\2\u0313"+ - "\1\u0312\4\u0313\1\u0314\1\u0313\43\0\1\u0280\4\u0315\2\0"+ - "\1\u0315\15\0\1\u0315\6\0\12\u0316\1\u02ca\13\0\1\u0230"+ - "\26\0\1\u0280\4\u0315\2\0\1\u0315\15\0\1\u0315\6\0"+ - "\12\u0315\1\u02ca\13\0\1\u0230\26\0\1\u0280\4\u0315\2\0"+ - "\1\u0315\15\0\1\u0315\6\0\2\u0316\1\u0315\2\u0316\2\u0315"+ - "\2\u0316\1\u0315\1\u02ca\13\0\1\u0230\27\0\4\u0317\2\0"+ - "\1\u0317\15\0\1\u0317\6\0\12\u0317\1\u027f\13\0\1\u0230"+ - "\26\0\1\u0318\33\0\12\u02d0\43\0\1\u0318\33\0\12\u0319"+ - "\43\0\1\u0318\33\0\1\u02d0\1\u031a\1\u0319\2\u02d0\2\u0319"+ - "\2\u02d0\1\u0319\43\0\1\u0280\4\u0317\2\0\1\u0317\15\0"+ - "\1\u0317\6\0\12\u0317\1\u027f\13\0\1\u0230\26\0\34\u0197"+ - "\12\u031b\1\0\2\u0197\1\u01e3\3\u0197\1\u0199\1\371\1\372"+ - "\1\373\2\0\2\u0197\4\0\1\u0197\20\0\4\u031c\2\0"+ - "\1\u031c\15\0\1\u031c\6\0\12\u031c\1\u0287\43\0\4\u031d"+ - "\2\0\1\u031d\15\0\1\u031d\6\0\12\u031d\1\u031e\43\0"+ - "\4\u031f\2\0\1\u031f\15\0\1\u031f\6\0\1\u0320\2\u0321"+ - "\1\u0320\4\u0321\1\u0322\1\u0321\14\0\1\264\27\0\4\u0323"+ - "\2\0\1\u0323\15\0\1\u0323\6\0\12\u0323\1\u02d8\13\0"+ - "\1\264\27\0\4\u031f\2\0\1\u031f\15\0\1\u031f\6\0"+ - "\1\u0320\2\u0321\1\u0320\4\u0321\1\u0322\1\u0321\43\0\1\365"+ - "\4\u0323\2\0\1\u0323\15\0\1\u0323\6\0\12\u0324\1\u02d8"+ - "\13\0\1\264\26\0\1\365\4\u0323\2\0\1\u0323\15\0"+ - "\1\u0323\6\0\12\u0323\1\u02d8\13\0\1\264\26\0\1\365"+ - "\4\u0323\2\0\1\u0323\15\0\1\u0323\6\0\2\u0324\1\u0323"+ - "\2\u0324\2\u0323\2\u0324\1\u0323\1\u02d8\13\0\1\264\74\0"+ - "\1\u0239\13\0\1\264\26\0\1\172\1\u0325\31\63\1\173"+ - "\12\63\43\0\1\172\25\63\1\374\4\63\1\173\12\63"+ - "\43\0\1\172\32\63\1\173\5\63\1\u0326\4\63\43\0"+ - "\1\172\32\63\1\173\3\63\1\u0325\6\63\43\0\1\172"+ - "\12\63\1\u0327\17\63\1\173\12\63\43\0\1\172\25\63"+ - "\1\u0328\4\63\1\173\12\63\43\0\1\172\15\63\1\u0329"+ - "\14\63\1\173\12\63\43\0\1\172\2\63\1\u02de\27\63"+ - "\1\173\12\63\43\0\1\172\1\63\1\374\30\63\1\173"+ - "\12\63\43\0\1\172\11\63\1\u032a\20\63\1\173\12\63"+ - "\43\0\1\172\1\u032b\31\63\1\173\12\63\43\0\1\172"+ - "\1\u032c\31\63\1\173\12\63\43\0\1\172\2\63\1\u032d"+ - "\27\63\1\173\12\63\43\0\1\172\32\63\1\173\4\63"+ - "\1\u0103\5\63\43\0\1\172\1\u032e\31\63\1\173\12\63"+ - "\43\0\1\172\25\63\1\u032f\4\63\1\173\12\63\43\0"+ - "\1\172\32\63\1\173\4\63\1\u0325\5\63\43\0\1\172"+ - "\32\63\1\173\11\63\1\u0325\43\0\1\172\32\63\1\173"+ - "\2\63\1\u0325\7\63\43\0\1\172\16\63\1\u0330\13\63"+ - "\1\173\12\63\43\0\1\172\32\63\1\173\3\63\1\u0331"+ - "\6\63\43\0\1\172\24\63\1\u0332\5\63\1\173\12\63"+ - "\43\0\1\221\32\117\1\36\10\117\1\u025e\1\117\1\0"+ - "\3\33\1\0\2\33\1\34\3\33\3\0\1\33\4\0"+ - "\2\33\16\0\1\221\1\u0333\31\117\1\36\12\117\1\0"+ - "\3\33\1\0\2\33\1\34\3\33\3\0\1\33\4\0"+ - "\2\33\16\0\1\221\7\117\1\u0334\22\117\1\36\12\117"+ - "\1\0\3\33\1\0\2\33\1\34\3\33\3\0\1\33"+ - "\4\0\2\33\16\0\1\221\1\u0335\31\117\1\36\12\117"+ - "\1\0\3\33\1\0\2\33\1\34\3\33\3\0\1\33"+ - "\4\0\2\33\16\0\1\221\25\117\1\u0336\4\117\1\36"+ - "\12\117\1\0\3\33\1\0\2\33\1\34\3\33\3\0"+ - "\1\33\4\0\2\33\16\0\1\221\32\117\1\36\11\117"+ - "\1\u0337\1\0\3\33\1\0\2\33\1\34\3\33\3\0"+ - "\1\33\4\0\2\33\16\0\1\221\1\u0338\31\117\1\36"+ - "\12\117\1\0\3\33\1\0\2\33\1\34\3\33\3\0"+ - "\1\33\4\0\2\33\16\0\1\221\12\117\1\u0339\17\117"+ - "\1\36\12\117\1\0\3\33\1\0\2\33\1\34\3\33"+ - "\3\0\1\33\4\0\2\33\16\0\1\221\1\u033a\31\117"+ - "\1\36\12\117\1\0\3\33\1\0\2\33\1\34\3\33"+ - "\3\0\1\33\4\0\2\33\16\0\1\221\10\117\1\u033b"+ - "\21\117\1\36\12\117\1\0\3\33\1\0\2\33\1\34"+ - "\3\33\3\0\1\33\4\0\2\33\16\0\1\221\31\117"+ - "\1\u033c\1\36\12\117\1\0\3\33\1\0\2\33\1\34"+ - "\3\33\3\0\1\33\4\0\2\33\17\0\4\u033d\2\0"+ - "\1\u033d\15\0\1\u033d\6\0\12\u033d\1\u02b8\43\0\4\u033e"+ - "\2\0\1\u033e\15\0\1\u033e\6\0\12\u033e\1\u033f\43\0"+ - "\4\u0340\2\0\1\u0340\15\0\1\u0340\6\0\1\u0341\2\u0342"+ - "\1\u0341\4\u0342\1\u0343\1\u0342\14\0\1\u0273\27\0\4\u0344"+ - "\2\0\1\u0344\15\0\1\u0344\6\0\12\u0344\1\u0302\13\0"+ - "\1\u0273\27\0\4\u0340\2\0\1\u0340\15\0\1\u0340\6\0"+ - "\1\u0341\2\u0342\1\u0341\4\u0342\1\u0343\1\u0342\43\0\1\u02bf"+ - "\4\u0344\2\0\1\u0344\15\0\1\u0344\6\0\12\u0345\1\u0302"+ - "\13\0\1\u0273\26\0\1\u02bf\4\u0344\2\0\1\u0344\15\0"+ - "\1\u0344\6\0\12\u0344\1\u0302\13\0\1\u0273\26\0\1\u02bf"+ - "\4\u0344\2\0\1\u0344\15\0\1\u0344\6\0\2\u0345\1\u0344"+ - "\2\u0345\2\u0344\2\u0345\1\u0344\1\u0302\13\0\1\u0273\27\0"+ - "\4\u0346\2\0\1\u0346\15\0\1\u0346\6\0\12\u0346\1\u02be"+ - "\13\0\1\u0273\26\0\1\u0347\33\0\12\u0308\43\0\1\u0347"+ - "\33\0\12\u0348\43\0\1\u0347\33\0\1\u0308\1\u0349\1\u0348"+ - "\2\u0308\2\u0348\2\u0308\1\u0348\43\0\1\u02bf\4\u0346\2\0"+ - "\1\u0346\15\0\1\u0346\6\0\12\u0346\1\u02be\13\0\1\u0273"+ - "\26\0\46\u0156\1\0\2\u0156\1\u0190\3\u0156\1\u0158\1\0"+ - "\1\u018f\3\0\2\u0156\4\0\1\u0156\103\0\1\u034a\60\0"+ - "\12\u034b\11\0\1\u018f\77\0\1\u0279\43\0\4\u034c\2\0"+ - "\1\u034c\15\0\1\u034c\6\0\12\u034c\1\u0310\43\0\4\u034d"+ - "\2\0\1\u034d\15\0\1\u034d\6\0\12\u034d\1\u034e\43\0"+ - "\4\u034f\2\0\1\u034f\15\0\1\u034f\6\0\12\u034f\1\u0350"+ - "\13\0\1\u0230\26\0\1\u0280\4\u034f\2\0\1\u034f\15\0"+ - "\1\u034f\6\0\12\u0351\1\u0350\13\0\1\u0230\26\0\1\u0280"+ - "\4\u034f\2\0\1\u034f\15\0\1\u034f\6\0\12\u0352\1\u0350"+ - "\13\0\1\u0230\26\0\1\u0280\4\u034f\2\0\1\u034f\15\0"+ - "\1\u034f\6\0\1\u0351\1\u0353\1\u0352\2\u0351\2\u0352\2\u0351"+ - "\1\u0352\1\u0350\13\0\1\u0230\27\0\4\u0354\2\0\1\u0354"+ - "\15\0\1\u0354\6\0\12\u0354\1\u02ca\13\0\1\u0230\26\0"+ - "\1\u0280\4\u0354\2\0\1\u0354\15\0\1\u0354\6\0\12\u0354"+ - "\1\u02ca\13\0\1\u0230\74\0\1\u027f\13\0\1\u0230\62\0"+ - "\1\u0355\2\u0356\1\u0355\4\u0356\1\u0357\1\u0356\43\0\1\u0318"+ - "\110\0\1\u0318\33\0\2\u0319\1\0\2\u0319\2\0\2\u0319"+ - "\44\0\34\u0197\12\u0358\1\0\2\u0197\1\u01e3\3\u0197\1\u0199"+ - "\1\371\1\372\1\373\2\0\2\u0197\4\0\1\u0197\65\0"+ - "\1\u0287\43\0\4\u0359\2\0\1\u0359\15\0\1\u0359\6\0"+ - "\12\u0359\1\u031e\43\0\4\u035a\2\0\1\u035a\15\0\1\u035a"+ - "\6\0\1\u035b\2\u035c\1\u035b\4\u035c\1\u035d\1\u035c\1\u035e"+ - "\43\0\4\u035f\2\0\1\u035f\15\0\1\u035f\6\0\12\u035f"+ - "\1\u0360\13\0\1\264\26\0\1\365\4\u035f\2\0\1\u035f"+ - "\15\0\1\u035f\6\0\12\u0361\1\u0360\13\0\1\264\26\0"+ - "\1\365\4\u035f\2\0\1\u035f\15\0\1\u035f\6\0\12\u0362"+ - "\1\u0360\13\0\1\264\26\0\1\365\4\u035f\2\0\1\u035f"+ - "\15\0\1\u035f\6\0\1\u0361\1\u0363\1\u0362\2\u0361\2\u0362"+ - "\2\u0361\1\u0362\1\u0360\13\0\1\264\27\0\4\u0364\2\0"+ - "\1\u0364\15\0\1\u0364\6\0\12\u0364\1\u02d8\13\0\1\264"+ - "\26\0\1\365\4\u0364\2\0\1\u0364\15\0\1\u0364\6\0"+ - "\12\u0364\1\u02d8\13\0\1\264\26\0\1\172\2\63\1\374"+ - "\27\63\1\173\12\63\43\0\1\172\3\63\1\u0365\26\63"+ - "\1\173\12\63\43\0\1\172\32\63\1\173\11\63\1\u0366"+ - "\43\0\1\172\32\63\1\173\11\63\1\u0367\43\0\1\172"+ - "\32\63\1\173\7\63\1\u0368\2\63\43\0\1\172\32\63"+ - "\1\173\4\63\1\u0369\5\63\43\0\1\172\26\63\1\u036a"+ - "\3\63\1\173\12\63\43\0\1\172\30\63\1\u036b\1\63"+ - "\1\173\12\63\43\0\1\172\11\63\1\u0136\20\63\1\173"+ - "\12\63\43\0\1\172\12\63\1\u036c\17\63\1\173\12\63"+ - "\43\0\1\172\17\63\1\u0104\12\63\1\173\12\63\43\0"+ - "\1\172\32\63\1\173\4\63\1\u036d\5\63\43\0\1\172"+ - "\30\63\1\u036e\1\63\1\173\12\63\43\0\1\172\30\63"+ - "\1\u036f\1\63\1\173\12\63\43\0\1\221\32\117\1\36"+ - "\6\117\1\u0257\3\117\1\0\3\33\1\0\2\33\1\34"+ - "\3\33\3\0\1\33\4\0\2\33\16\0\1\221\15\117"+ - "\1\142\14\117\1\36\12\117\1\0\3\33\1\0\2\33"+ - "\1\34\3\33\3\0\1\33\4\0\2\33\16\0\1\221"+ - "\32\117\1\36\1\117\1\u0370\10\117\1\0\3\33\1\0"+ - "\2\33\1\34\3\33\3\0\1\33\4\0\2\33\16\0"+ - "\1\221\30\117\1\u0371\1\117\1\36\12\117\1\0\3\33"+ - "\1\0\2\33\1\34\3\33\3\0\1\33\4\0\2\33"+ - "\16\0\1\221\6\117\1\u0372\23\117\1\36\12\117\1\0"+ - "\3\33\1\0\2\33\1\34\3\33\3\0\1\33\4\0"+ - "\2\33\16\0\1\221\32\117\1\36\5\117\1\u0373\4\117"+ - "\1\0\3\33\1\0\2\33\1\34\3\33\3\0\1\33"+ - "\4\0\2\33\16\0\1\221\22\117\1\226\7\117\1\36"+ - "\12\117\1\0\3\33\1\0\2\33\1\34\3\33\3\0"+ - "\1\33\4\0\2\33\16\0\1\221\32\117\1\36\5\117"+ - "\1\u0374\4\117\1\0\3\33\1\0\2\33\1\34\3\33"+ - "\3\0\1\33\4\0\2\33\16\0\1\221\32\117\1\36"+ - "\1\117\1\142\10\117\1\0\3\33\1\0\2\33\1\34"+ - "\3\33\3\0\1\33\4\0\2\33\16\0\1\221\13\117"+ - "\1\u0375\16\117\1\36\12\117\1\0\3\33\1\0\2\33"+ - "\1\34\3\33\3\0\1\33\4\0\2\33\64\0\1\u02b8"+ - "\43\0\4\u0376\2\0\1\u0376\15\0\1\u0376\6\0\12\u0376"+ - "\1\u033f\43\0\4\u0377\2\0\1\u0377\15\0\1\u0377\6\0"+ - "\12\u0377\1\u0378\43\0\4\u0379\2\0\1\u0379\15\0\1\u0379"+ - "\6\0\12\u0379\1\u037a\13\0\1\u0273\26\0\1\u02bf\4\u0379"+ - "\2\0\1\u0379\15\0\1\u0379\6\0\12\u037b\1\u037a\13\0"+ - "\1\u0273\26\0\1\u02bf\4\u0379\2\0\1\u0379\15\0\1\u0379"+ - "\6\0\12\u037c\1\u037a\13\0\1\u0273\26\0\1\u02bf\4\u0379"+ - "\2\0\1\u0379\15\0\1\u0379\6\0\1\u037b\1\u037d\1\u037c"+ - "\2\u037b\2\u037c\2\u037b\1\u037c\1\u037a\13\0\1\u0273\27\0"+ - "\4\u037e\2\0\1\u037e\15\0\1\u037e\6\0\12\u037e\1\u0302"+ - "\13\0\1\u0273\26\0\1\u02bf\4\u037e\2\0\1\u037e\15\0"+ - "\1\u037e\6\0\12\u037e\1\u0302\13\0\1\u0273\74\0\1\u02be"+ - "\13\0\1\u0273\62\0\1\u037f\2\u0380\1\u037f\4\u0380\1\u0381"+ - "\1\u0380\43\0\1\u0347\110\0\1\u0347\33\0\2\u0348\1\0"+ - "\2\u0348\2\0\2\u0348\45\0\1\u0382\1\0\1\u0382\5\0"+ - "\1\u0382\156\0\1\u018f\32\0\4\u0383\2\0\1\u0383\15\0"+ - "\1\u0383\6\0\12\u0383\1\u0310\43\0\4\u0384\2\0\1\u0384"+ - "\15\0\1\u0384\6\0\12\u0384\1\u0385\43\0\4\u0386\2\0"+ - "\1\u0386\15\0\1\u0386\6\0\1\u0387\2\u0388\1\u0387\4\u0388"+ - "\1\u0389\1\u0388\14\0\1\u0230\27\0\4\u038a\2\0\1\u038a"+ - "\15\0\1\u038a\6\0\12\u038a\1\u0350\13\0\1\u0230\27\0"+ - "\4\u0386\2\0\1\u0386\15\0\1\u0386\6\0\1\u0387\2\u0388"+ - "\1\u0387\4\u0388\1\u0389\1\u0388\43\0\1\u0280\4\u038a\2\0"+ - "\1\u038a\15\0\1\u038a\6\0\12\u038b\1\u0350\13\0\1\u0230"+ - "\26\0\1\u0280\4\u038a\2\0\1\u038a\15\0\1\u038a\6\0"+ - "\12\u038a\1\u0350\13\0\1\u0230\26\0\1\u0280\4\u038a\2\0"+ - "\1\u038a\15\0\1\u038a\6\0\2\u038b\1\u038a\2\u038b\2\u038a"+ - "\2\u038b\1\u038a\1\u0350\13\0\1\u0230\74\0\1\u02ca\13\0"+ - "\1\u0230\26\0\1\u038c\33\0\12\u0356\43\0\1\u038c\33\0"+ - "\12\u038d\43\0\1\u038c\33\0\1\u0356\1\u038e\1\u038d\2\u0356"+ - "\2\u038d\2\u0356\1\u038d\43\0\46\u0197\1\0\2\u0197\1\u01e3"+ - "\3\u0197\1\u0199\1\371\1\372\1\373\2\0\2\u0197\4\0"+ - "\1\u0197\20\0\4\u038f\2\0\1\u038f\15\0\1\u038f\6\0"+ - "\12\u038f\1\u031e\43\0\4\u0390\2\0\1\u0390\15\0\1\u0390"+ - "\6\0\12\u0390\1\u0391\42\0\1\365\4\u0390\2\0\1\u0390"+ - "\15\0\1\u0390\6\0\12\u0392\1\u0391\42\0\1\365\4\u0390"+ - "\2\0\1\u0390\15\0\1\u0390\6\0\12\u0393\1\u0391\42\0"+ - "\1\365\4\u0390\2\0\1\u0390\15\0\1\u0390\6\0\1\u0392"+ - "\1\u0394\1\u0393\2\u0392\2\u0393\2\u0392\1\u0393\1\u0391\43\0"+ - "\4\u0395\2\0\1\u0395\15\0\1\u0395\6\0\12\u0395\14\0"+ - "\1\264\27\0\4\u0396\2\0\1\u0396\15\0\1\u0396\6\0"+ - "\12\u0396\1\u0360\13\0\1\264\27\0\4\u0395\2\0\1\u0395"+ - "\15\0\1\u0395\6\0\12\u0395\43\0\1\365\4\u0396\2\0"+ - "\1\u0396\15\0\1\u0396\6\0\12\u0397\1\u0360\13\0\1\264"+ - "\26\0\1\365\4\u0396\2\0\1\u0396\15\0\1\u0396\6\0"+ - "\12\u0396\1\u0360\13\0\1\264\26\0\1\365\4\u0396\2\0"+ - "\1\u0396\15\0\1\u0396\6\0\2\u0397\1\u0396\2\u0397\2\u0396"+ - "\2\u0397\1\u0396\1\u0360\13\0\1\264\74\0\1\u02d8\13\0"+ - "\1\264\26\0\1\172\32\63\1\173\10\63\1\u02e5\1\63"+ - "\43\0\1\172\1\u0398\31\63\1\173\12\63\43\0\1\172"+ - "\7\63\1\u0399\22\63\1\173\12\63\43\0\1\172\1\u039a"+ - "\31\63\1\173\12\63\43\0\1\172\25\63\1\u039b\4\63"+ - "\1\173\12\63\43\0\1\172\32\63\1\173\11\63\1\u039c"+ - "\43\0\1\172\1\u039d\31\63\1\173\12\63\43\0\1\172"+ - "\12\63\1\u039e\17\63\1\173\12\63\43\0\1\172\1\u039f"+ - "\31\63\1\173\12\63\43\0\1\172\10\63\1\u03a0\21\63"+ - "\1\173\12\63\43\0\1\172\31\63\1\u03a1\1\173\12\63"+ - "\43\0\1\221\2\117\1\u03a2\27\117\1\36\12\117\1\0"+ - "\3\33\1\0\2\33\1\34\3\33\3\0\1\33\4\0"+ - "\2\33\16\0\1\221\3\117\1\u03a3\26\117\1\36\12\117"+ - "\1\0\3\33\1\0\2\33\1\34\3\33\3\0\1\33"+ - "\4\0\2\33\16\0\1\221\32\117\1\36\1\117\1\u03a4"+ - "\10\117\1\0\3\33\1\0\2\33\1\34\3\33\3\0"+ - "\1\33\4\0\2\33\16\0\1\221\3\117\1\u03a5\26\117"+ - "\1\36\12\117\1\0\3\33\1\0\2\33\1\34\3\33"+ - "\3\0\1\33\4\0\2\33\16\0\1\221\1\u03a6\31\117"+ - "\1\36\12\117\1\0\3\33\1\0\2\33\1\34\3\33"+ - "\3\0\1\33\4\0\2\33\16\0\1\221\26\117\1\u03a7"+ - "\3\117\1\36\12\117\1\0\3\33\1\0\2\33\1\34"+ - "\3\33\3\0\1\33\4\0\2\33\17\0\4\u03a8\2\0"+ - "\1\u03a8\15\0\1\u03a8\6\0\12\u03a8\1\u033f\43\0\4\u03a9"+ - "\2\0\1\u03a9\15\0\1\u03a9\6\0\12\u03a9\1\u03aa\43\0"+ - "\4\u03ab\2\0\1\u03ab\15\0\1\u03ab\6\0\1\u03ac\2\u03ad"+ - "\1\u03ac\4\u03ad\1\u03ae\1\u03ad\14\0\1\u0273\27\0\4\u03af"+ - "\2\0\1\u03af\15\0\1\u03af\6\0\12\u03af\1\u037a\13\0"+ - "\1\u0273\27\0\4\u03ab\2\0\1\u03ab\15\0\1\u03ab\6\0"+ - "\1\u03ac\2\u03ad\1\u03ac\4\u03ad\1\u03ae\1\u03ad\43\0\1\u02bf"+ - "\4\u03af\2\0\1\u03af\15\0\1\u03af\6\0\12\u03b0\1\u037a"+ - "\13\0\1\u0273\26\0\1\u02bf\4\u03af\2\0\1\u03af\15\0"+ - "\1\u03af\6\0\12\u03af\1\u037a\13\0\1\u0273\26\0\1\u02bf"+ - "\4\u03af\2\0\1\u03af\15\0\1\u03af\6\0\2\u03b0\1\u03af"+ - "\2\u03b0\2\u03af\2\u03b0\1\u03af\1\u037a\13\0\1\u0273\74\0"+ - "\1\u0302\13\0\1\u0273\26\0\1\u03b1\33\0\12\u0380\43\0"+ - "\1\u03b1\33\0\12\u03b2\43\0\1\u03b1\33\0\1\u0380\1\u03b3"+ - "\1\u03b2\2\u0380\2\u03b2\2\u0380\1\u03b2\123\0\1\373\76\0"+ - "\1\u0310\43\0\4\u03b4\2\0\1\u03b4\15\0\1\u03b4\6\0"+ - "\12\u03b4\1\u0385\43\0\4\u03b5\2\0\1\u03b5\15\0\1\u03b5"+ - "\6\0\12\u03b5\1\u03b6\43\0\4\u03b7\2\0\1\u03b7\15\0"+ - "\1\u03b7\6\0\12\u03b7\1\u03b8\13\0\1\u0230\26\0\1\u0280"+ - "\4\u03b7\2\0\1\u03b7\15\0\1\u03b7\6\0\12\u03b9\1\u03b8"+ - "\13\0\1\u0230\26\0\1\u0280\4\u03b7\2\0\1\u03b7\15\0"+ - "\1\u03b7\6\0\12\u03ba\1\u03b8\13\0\1\u0230\26\0\1\u0280"+ - "\4\u03b7\2\0\1\u03b7\15\0\1\u03b7\6\0\1\u03b9\1\u03bb"+ - "\1\u03ba\2\u03b9\2\u03ba\2\u03b9\1\u03ba\1\u03b8\13\0\1\u0230"+ - "\27\0\4\u03bc\2\0\1\u03bc\15\0\1\u03bc\6\0\12\u03bc"+ - "\1\u0350\13\0\1\u0230\26\0\1\u0280\4\u03bc\2\0\1\u03bc"+ - "\15\0\1\u03bc\6\0\12\u03bc\1\u0350\13\0\1\u0230\62\0"+ - "\1\u03bd\2\u03be\1\u03bd\4\u03be\1\u03bf\1\u03be\43\0\1\u038c"+ - "\110\0\1\u038c\33\0\2\u038d\1\0\2\u038d\2\0\2\u038d"+ - "\112\0\1\u031e\43\0\4\u03c0\2\0\1\u03c0\15\0\1\u03c0"+ - "\6\0\12\u03c0\1\u0391\43\0\4\u0395\2\0\1\u0395\15\0"+ - "\1\u0395\6\0\12\u0395\1\u028e\42\0\1\365\4\u03c0\2\0"+ - "\1\u03c0\15\0\1\u03c0\6\0\12\u03c1\1\u0391\42\0\1\365"+ - "\4\u03c0\2\0\1\u03c0\15\0\1\u03c0\6\0\12\u03c0\1\u0391"+ - "\42\0\1\365\4\u03c0\2\0\1\u03c0\15\0\1\u03c0\6\0"+ - "\2\u03c1\1\u03c0\2\u03c1\2\u03c0\2\u03c1\1\u03c0\1\u0391\43\0"+ - "\4\u03c2\2\0\1\u03c2\15\0\1\u03c2\6\0\12\u03c2\14\0"+ - "\1\264\27\0\4\u03c3\2\0\1\u03c3\15\0\1\u03c3\6\0"+ - "\12\u03c3\1\u0360\13\0\1\264\26\0\1\365\4\u03c3\2\0"+ - "\1\u03c3\15\0\1\u03c3\6\0\12\u03c3\1\u0360\13\0\1\264"+ - "\26\0\1\172\32\63\1\173\6\63\1\u02de\3\63\43\0"+ - "\1\172\15\63\1\301\14\63\1\173\12\63\43\0\1\172"+ - "\32\63\1\173\1\63\1\u03c4\10\63\43\0\1\172\30\63"+ - "\1\u03c5\1\63\1\173\12\63\43\0\1\172\6\63\1\u03c6"+ - "\23\63\1\173\12\63\43\0\1\172\32\63\1\173\5\63"+ - "\1\u03c7\4\63\43\0\1\172\22\63\1\374\7\63\1\173"+ - "\12\63\43\0\1\172\32\63\1\173\5\63\1\u03c8\4\63"+ - "\43\0\1\172\32\63\1\173\1\63\1\301\10\63\43\0"+ - "\1\172\13\63\1\u03c9\16\63\1\173\12\63\43\0\1\221"+ - "\32\117\1\36\7\117\1\u03ca\2\117\1\0\3\33\1\0"+ - "\2\33\1\34\3\33\3\0\1\33\4\0\2\33\16\0"+ - "\1\221\32\117\1\36\10\117\1\142\1\117\1\0\3\33"+ - "\1\0\2\33\1\34\3\33\3\0\1\33\4\0\2\33"+ - "\16\0\1\221\32\117\1\36\4\117\1\u03cb\5\117\1\0"+ - "\3\33\1\0\2\33\1\34\3\33\3\0\1\33\4\0"+ - "\2\33\16\0\1\221\16\117\1\u03cc\13\117\1\36\12\117"+ - "\1\0\3\33\1\0\2\33\1\34\3\33\3\0\1\33"+ - "\4\0\2\33\16\0\1\221\26\117\1\u03cd\3\117\1\36"+ - "\12\117\1\0\3\33\1\0\2\33\1\34\3\33\3\0"+ - "\1\33\4\0\2\33\16\0\1\221\32\117\1\36\7\117"+ - "\1\u03ce\2\117\1\0\3\33\1\0\2\33\1\34\3\33"+ - "\3\0\1\33\4\0\2\33\64\0\1\u033f\43\0\4\u03cf"+ - "\2\0\1\u03cf\15\0\1\u03cf\6\0\12\u03cf\1\u03aa\43\0"+ - "\4\u03d0\2\0\1\u03d0\15\0\1\u03d0\6\0\12\u03d0\1\u03d1"+ - "\43\0\4\u03d2\2\0\1\u03d2\15\0\1\u03d2\6\0\12\u03d2"+ - "\1\u03d3\13\0\1\u0273\26\0\1\u02bf\4\u03d2\2\0\1\u03d2"+ - "\15\0\1\u03d2\6\0\12\u03d4\1\u03d3\13\0\1\u0273\26\0"+ - "\1\u02bf\4\u03d2\2\0\1\u03d2\15\0\1\u03d2\6\0\12\u03d5"+ - "\1\u03d3\13\0\1\u0273\26\0\1\u02bf\4\u03d2\2\0\1\u03d2"+ - "\15\0\1\u03d2\6\0\1\u03d4\1\u03d6\1\u03d5\2\u03d4\2\u03d5"+ - "\2\u03d4\1\u03d5\1\u03d3\13\0\1\u0273\27\0\4\u03d7\2\0"+ - "\1\u03d7\15\0\1\u03d7\6\0\12\u03d7\1\u037a\13\0\1\u0273"+ - "\26\0\1\u02bf\4\u03d7\2\0\1\u03d7\15\0\1\u03d7\6\0"+ - "\12\u03d7\1\u037a\13\0\1\u0273\62\0\1\u03d8\2\u03d9\1\u03d8"+ - "\4\u03d9\1\u03da\1\u03d9\43\0\1\u03b1\110\0\1\u03b1\33\0"+ - "\2\u03b2\1\0\2\u03b2\2\0\2\u03b2\45\0\4\u03db\2\0"+ - "\1\u03db\15\0\1\u03db\6\0\12\u03db\1\u0385\43\0\4\u03dc"+ - "\2\0\1\u03dc\15\0\1\u03dc\6\0\12\u03dc\1\u03dd\43\0"+ - "\4\u03de\2\0\1\u03de\15\0\1\u03de\6\0\1\u03df\2\u03e0"+ - "\1\u03df\4\u03e0\1\u03e1\1\u03e0\14\0\1\u0230\27\0\4\u03e2"+ - "\2\0\1\u03e2\15\0\1\u03e2\6\0\12\u03e2\1\u03b8\13\0"+ - "\1\u0230\27\0\4\u03de\2\0\1\u03de\15\0\1\u03de\6\0"+ - "\1\u03df\2\u03e0\1\u03df\4\u03e0\1\u03e1\1\u03e0\43\0\1\u0280"+ - "\4\u03e2\2\0\1\u03e2\15\0\1\u03e2\6\0\12\u03e3\1\u03b8"+ - "\13\0\1\u0230\26\0\1\u0280\4\u03e2\2\0\1\u03e2\15\0"+ - "\1\u03e2\6\0\12\u03e2\1\u03b8\13\0\1\u0230\26\0\1\u0280"+ - "\4\u03e2\2\0\1\u03e2\15\0\1\u03e2\6\0\2\u03e3\1\u03e2"+ - "\2\u03e3\2\u03e2\2\u03e3\1\u03e2\1\u03b8\13\0\1\u0230\74\0"+ - "\1\u0350\13\0\1\u0230\62\0\12\u03be\14\0\1\u0230\62\0"+ - "\12\u03e4\14\0\1\u0230\62\0\1\u03be\1\u03e5\1\u03e4\2\u03be"+ - "\2\u03e4\2\u03be\1\u03e4\14\0\1\u0230\27\0\4\u03e6\2\0"+ - "\1\u03e6\15\0\1\u03e6\6\0\12\u03e6\1\u0391\42\0\1\365"+ - "\4\u03e6\2\0\1\u03e6\15\0\1\u03e6\6\0\12\u03e6\1\u0391"+ - "\43\0\4\u03e7\2\0\1\u03e7\15\0\1\u03e7\6\0\12\u03e7"+ - "\14\0\1\264\74\0\1\u0360\13\0\1\264\26\0\1\172"+ - "\2\63\1\u03e8\27\63\1\173\12\63\43\0\1\172\3\63"+ - "\1\u03e9\26\63\1\173\12\63\43\0\1\172\32\63\1\173"+ - "\1\63\1\u03ea\10\63\43\0\1\172\3\63\1\u03eb\26\63"+ - "\1\173\12\63\43\0\1\172\1\u03ec\31\63\1\173\12\63"+ - "\43\0\1\172\26\63\1\u03ed\3\63\1\173\12\63\43\0"+ - "\1\221\1\u03ee\31\117\1\36\12\117\1\0\3\33\1\0"+ - "\2\33\1\34\3\33\3\0\1\33\4\0\2\33\16\0"+ - "\1\221\24\117\1\u03ef\5\117\1\36\12\117\1\0\3\33"+ - "\1\0\2\33\1\34\3\33\3\0\1\33\4\0\2\33"+ - "\16\0\1\221\1\117\1\u03f0\30\117\1\36\12\117\1\0"+ - "\3\33\1\0\2\33\1\34\3\33\3\0\1\33\4\0"+ - "\2\33\16\0\1\221\32\117\1\36\2\117\1\235\7\117"+ - "\1\0\3\33\1\0\2\33\1\34\3\33\3\0\1\33"+ - "\4\0\2\33\16\0\1\221\6\117\1\226\23\117\1\36"+ - "\12\117\1\0\3\33\1\0\2\33\1\34\3\33\3\0"+ - "\1\33\4\0\2\33\17\0\4\u03f1\2\0\1\u03f1\15\0"+ - "\1\u03f1\6\0\12\u03f1\1\u03aa\43\0\4\u03f2\2\0\1\u03f2"+ - "\15\0\1\u03f2\6\0\12\u03f2\1\u03f3\43\0\4\u03f4\2\0"+ - "\1\u03f4\15\0\1\u03f4\6\0\1\u03f5\2\u03f6\1\u03f5\4\u03f6"+ - "\1\u03f7\1\u03f6\14\0\1\u0273\27\0\4\u03f8\2\0\1\u03f8"+ - "\15\0\1\u03f8\6\0\12\u03f8\1\u03d3\13\0\1\u0273\27\0"+ - "\4\u03f4\2\0\1\u03f4\15\0\1\u03f4\6\0\1\u03f5\2\u03f6"+ - "\1\u03f5\4\u03f6\1\u03f7\1\u03f6\43\0\1\u02bf\4\u03f8\2\0"+ - "\1\u03f8\15\0\1\u03f8\6\0\12\u03f9\1\u03d3\13\0\1\u0273"+ - "\26\0\1\u02bf\4\u03f8\2\0\1\u03f8\15\0\1\u03f8\6\0"+ - "\12\u03f8\1\u03d3\13\0\1\u0273\26\0\1\u02bf\4\u03f8\2\0"+ - "\1\u03f8\15\0\1\u03f8\6\0\2\u03f9\1\u03f8\2\u03f9\2\u03f8"+ - "\2\u03f9\1\u03f8\1\u03d3\13\0\1\u0273\74\0\1\u037a\13\0"+ - "\1\u0273\62\0\12\u03d9\14\0\1\u0273\62\0\12\u03fa\14\0"+ - "\1\u0273\62\0\1\u03d9\1\u03fb\1\u03fa\2\u03d9\2\u03fa\2\u03d9"+ - "\1\u03fa\14\0\1\u0273\74\0\1\u0385\43\0\4\u03fc\2\0"+ - "\1\u03fc\15\0\1\u03fc\6\0\12\u03fc\1\u03dd\43\0\4\u03fd"+ - "\2\0\1\u03fd\15\0\1\u03fd\6\0\12\u03fd\1\u03fe\43\0"+ - "\4\u03ff\2\0\1\u03ff\15\0\1\u03ff\6\0\12\u03ff\1\u0400"+ - "\13\0\1\u0230\26\0\1\u0280\4\u03ff\2\0\1\u03ff\15\0"+ - "\1\u03ff\6\0\12\u0401\1\u0400\13\0\1\u0230\26\0\1\u0280"+ - "\4\u03ff\2\0\1\u03ff\15\0\1\u03ff\6\0\12\u0402\1\u0400"+ - "\13\0\1\u0230\26\0\1\u0280\4\u03ff\2\0\1\u03ff\15\0"+ - "\1\u03ff\6\0\1\u0401\1\u0403\1\u0402\2\u0401\2\u0402\2\u0401"+ - "\1\u0402\1\u0400\13\0\1\u0230\27\0\4\u0404\2\0\1\u0404"+ - "\15\0\1\u0404\6\0\12\u0404\1\u03b8\13\0\1\u0230\26\0"+ - "\1\u0280\4\u0404\2\0\1\u0404\15\0\1\u0404\6\0\12\u0404"+ - "\1\u03b8\13\0\1\u0230\110\0\1\u0230\62\0\2\u03e4\1\0"+ - "\2\u03e4\2\0\2\u03e4\15\0\1\u0230\74\0\1\u0391\43\0"+ - "\4\u028e\2\0\1\u028e\15\0\1\u028e\6\0\12\u028e\14\0"+ - "\1\264\26\0\1\172\32\63\1\173\7\63\1\u0405\2\63"+ - "\43\0\1\172\32\63\1\173\10\63\1\301\1\63\43\0"+ - "\1\172\32\63\1\173\4\63\1\u0406\5\63\43\0\1\172"+ - "\16\63\1\u0407\13\63\1\173\12\63\43\0\1\172\26\63"+ - "\1\u0408\3\63\1\173\12\63\43\0\1\172\32\63\1\173"+ - "\7\63\1\u0409\2\63\43\0\1\221\4\117\1\226\25\117"+ - "\1\36\12\117\1\0\3\33\1\0\2\33\1\34\3\33"+ - "\3\0\1\33\4\0\2\33\16\0\1\221\24\117\1\142"+ - "\5\117\1\36\12\117\1\0\3\33\1\0\2\33\1\34"+ - "\3\33\3\0\1\33\4\0\2\33\16\0\1\221\32\117"+ - "\1\36\11\117\1\142\1\0\3\33\1\0\2\33\1\34"+ - "\3\33\3\0\1\33\4\0\2\33\64\0\1\u03aa\43\0"+ - "\4\u040a\2\0\1\u040a\15\0\1\u040a\6\0\12\u040a\1\u03f3"+ - "\43\0\4\u040b\2\0\1\u040b\15\0\1\u040b\6\0\12\u040b"+ - "\1\u040c\43\0\4\u040d\2\0\1\u040d\15\0\1\u040d\6\0"+ - "\12\u040d\1\u040e\13\0\1\u0273\26\0\1\u02bf\4\u040d\2\0"+ - "\1\u040d\15\0\1\u040d\6\0\12\u040f\1\u040e\13\0\1\u0273"+ - "\26\0\1\u02bf\4\u040d\2\0\1\u040d\15\0\1\u040d\6\0"+ - "\12\u0410\1\u040e\13\0\1\u0273\26\0\1\u02bf\4\u040d\2\0"+ - "\1\u040d\15\0\1\u040d\6\0\1\u040f\1\u0411\1\u0410\2\u040f"+ - "\2\u0410\2\u040f\1\u0410\1\u040e\13\0\1\u0273\27\0\4\u0412"+ - "\2\0\1\u0412\15\0\1\u0412\6\0\12\u0412\1\u03d3\13\0"+ - "\1\u0273\26\0\1\u02bf\4\u0412\2\0\1\u0412\15\0\1\u0412"+ - "\6\0\12\u0412\1\u03d3\13\0\1\u0273\110\0\1\u0273\62\0"+ - "\2\u03fa\1\0\2\u03fa\2\0\2\u03fa\15\0\1\u0273\27\0"+ - "\4\u0413\2\0\1\u0413\15\0\1\u0413\6\0\12\u0413\1\u03dd"+ - "\43\0\4\u0414\2\0\1\u0414\15\0\1\u0414\6\0\12\u0414"+ - "\1\u0415\43\0\4\u0416\2\0\1\u0416\15\0\1\u0416\6\0"+ - "\1\u0417\2\u0418\1\u0417\4\u0418\1\u0419\1\u0418\14\0\1\u0230"+ - "\27\0\4\u041a\2\0\1\u041a\15\0\1\u041a\6\0\12\u041a"+ - "\1\u0400\13\0\1\u0230\27\0\4\u0416\2\0\1\u0416\15\0"+ - "\1\u0416\6\0\1\u0417\2\u0418\1\u0417\4\u0418\1\u0419\1\u0418"+ - "\43\0\1\u0280\4\u041a\2\0\1\u041a\15\0\1\u041a\6\0"+ - "\12\u041b\1\u0400\13\0\1\u0230\26\0\1\u0280\4\u041a\2\0"+ - "\1\u041a\15\0\1\u041a\6\0\12\u041a\1\u0400\13\0\1\u0230"+ - "\26\0\1\u0280\4\u041a\2\0\1\u041a\15\0\1\u041a\6\0"+ - "\2\u041b\1\u041a\2\u041b\2\u041a\2\u041b\1\u041a\1\u0400\13\0"+ - "\1\u0230\74\0\1\u03b8\13\0\1\u0230\26\0\1\172\1\u041c"+ - "\31\63\1\173\12\63\43\0\1\172\24\63\1\u041d\5\63"+ - "\1\173\12\63\43\0\1\172\1\63\1\u041e\30\63\1\173"+ - "\12\63\43\0\1\172\32\63\1\173\2\63\1\u0103\7\63"+ - "\43\0\1\172\6\63\1\374\23\63\1\173\12\63\44\0"+ - "\4\u041f\2\0\1\u041f\15\0\1\u041f\6\0\12\u041f\1\u03f3"+ - "\43\0\4\u0420\2\0\1\u0420\15\0\1\u0420\6\0\12\u0420"+ - "\1\u0421\43\0\4\u0422\2\0\1\u0422\15\0\1\u0422\6\0"+ - "\1\u0423\2\u0424\1\u0423\4\u0424\1\u0425\1\u0424\14\0\1\u0273"+ - "\27\0\4\u0426\2\0\1\u0426\15\0\1\u0426\6\0\12\u0426"+ - "\1\u040e\13\0\1\u0273\27\0\4\u0422\2\0\1\u0422\15\0"+ - "\1\u0422\6\0\1\u0423\2\u0424\1\u0423\4\u0424\1\u0425\1\u0424"+ - "\43\0\1\u02bf\4\u0426\2\0\1\u0426\15\0\1\u0426\6\0"+ - "\12\u0427\1\u040e\13\0\1\u0273\26\0\1\u02bf\4\u0426\2\0"+ - "\1\u0426\15\0\1\u0426\6\0\12\u0426\1\u040e\13\0\1\u0273"+ - "\26\0\1\u02bf\4\u0426\2\0\1\u0426\15\0\1\u0426\6\0"+ - "\2\u0427\1\u0426\2\u0427\2\u0426\2\u0427\1\u0426\1\u040e\13\0"+ - "\1\u0273\74\0\1\u03d3\13\0\1\u0273\74\0\1\u03dd\43\0"+ - "\4\u0428\2\0\1\u0428\15\0\1\u0428\6\0\12\u0428\1\u0415"+ - "\43\0\4\u0429\2\0\1\u0429\15\0\1\u0429\6\0\1\u042a"+ - "\2\u042b\1\u042a\4\u042b\1\u042c\1\u042b\1\u042d\43\0\4\u042e"+ - "\2\0\1\u042e\15\0\1\u042e\6\0\12\u042e\1\u042f\13\0"+ - "\1\u0230\26\0\1\u0280\4\u042e\2\0\1\u042e\15\0\1\u042e"+ - "\6\0\12\u0430\1\u042f\13\0\1\u0230\26\0\1\u0280\4\u042e"+ - "\2\0\1\u042e\15\0\1\u042e\6\0\12\u0431\1\u042f\13\0"+ - "\1\u0230\26\0\1\u0280\4\u042e\2\0\1\u042e\15\0\1\u042e"+ - "\6\0\1\u0430\1\u0432\1\u0431\2\u0430\2\u0431\2\u0430\1\u0431"+ - "\1\u042f\13\0\1\u0230\27\0\4\u0433\2\0\1\u0433\15\0"+ - "\1\u0433\6\0\12\u0433\1\u0400\13\0\1\u0230\26\0\1\u0280"+ - "\4\u0433\2\0\1\u0433\15\0\1\u0433\6\0\12\u0433\1\u0400"+ - "\13\0\1\u0230\26\0\1\172\4\63\1\374\25\63\1\173"+ - "\12\63\43\0\1\172\24\63\1\301\5\63\1\173\12\63"+ - "\43\0\1\172\32\63\1\173\11\63\1\301\111\0\1\u03f3"+ - "\43\0\4\u0434\2\0\1\u0434\15\0\1\u0434\6\0\12\u0434"+ - "\1\u0421\43\0\4\u0435\2\0\1\u0435\15\0\1\u0435\6\0"+ - "\1\u0436\2\u0437\1\u0436\4\u0437\1\u0438\1\u0437\1\u0439\43\0"+ - "\4\u043a\2\0\1\u043a\15\0\1\u043a\6\0\12\u043a\1\u043b"+ - "\13\0\1\u0273\26\0\1\u02bf\4\u043a\2\0\1\u043a\15\0"+ - "\1\u043a\6\0\12\u043c\1\u043b\13\0\1\u0273\26\0\1\u02bf"+ - "\4\u043a\2\0\1\u043a\15\0\1\u043a\6\0\12\u043d\1\u043b"+ - "\13\0\1\u0273\26\0\1\u02bf\4\u043a\2\0\1\u043a\15\0"+ - "\1\u043a\6\0\1\u043c\1\u043e\1\u043d\2\u043c\2\u043d\2\u043c"+ - "\1\u043d\1\u043b\13\0\1\u0273\27\0\4\u043f\2\0\1\u043f"+ - "\15\0\1\u043f\6\0\12\u043f\1\u040e\13\0\1\u0273\26\0"+ - "\1\u02bf\4\u043f\2\0\1\u043f\15\0\1\u043f\6\0\12\u043f"+ - "\1\u040e\13\0\1\u0273\27\0\4\u0440\2\0\1\u0440\15\0"+ - "\1\u0440\6\0\12\u0440\1\u0415\43\0\4\u0441\2\0\1\u0441"+ - "\15\0\1\u0441\6\0\12\u0441\1\u0442\42\0\1\u0280\4\u0441"+ - "\2\0\1\u0441\15\0\1\u0441\6\0\12\u0443\1\u0442\42\0"+ - "\1\u0280\4\u0441\2\0\1\u0441\15\0\1\u0441\6\0\12\u0444"+ - "\1\u0442\42\0\1\u0280\4\u0441\2\0\1\u0441\15\0\1\u0441"+ - "\6\0\1\u0443\1\u0445\1\u0444\2\u0443\2\u0444\2\u0443\1\u0444"+ - "\1\u0442\43\0\4\u0446\2\0\1\u0446\15\0\1\u0446\6\0"+ - "\12\u0446\14\0\1\u0230\27\0\4\u0447\2\0\1\u0447\15\0"+ - "\1\u0447\6\0\12\u0447\1\u042f\13\0\1\u0230\27\0\4\u0446"+ - "\2\0\1\u0446\15\0\1\u0446\6\0\12\u0446\43\0\1\u0280"+ - "\4\u0447\2\0\1\u0447\15\0\1\u0447\6\0\12\u0448\1\u042f"+ - "\13\0\1\u0230\26\0\1\u0280\4\u0447\2\0\1\u0447\15\0"+ - "\1\u0447\6\0\12\u0447\1\u042f\13\0\1\u0230\26\0\1\u0280"+ - "\4\u0447\2\0\1\u0447\15\0\1\u0447\6\0\2\u0448\1\u0447"+ - "\2\u0448\2\u0447\2\u0448\1\u0447\1\u042f\13\0\1\u0230\74\0"+ - "\1\u0400\13\0\1\u0230\27\0\4\u0449\2\0\1\u0449\15\0"+ - "\1\u0449\6\0\12\u0449\1\u0421\43\0\4\u044a\2\0\1\u044a"+ - "\15\0\1\u044a\6\0\12\u044a\1\u044b\42\0\1\u02bf\4\u044a"+ - "\2\0\1\u044a\15\0\1\u044a\6\0\12\u044c\1\u044b\42\0"+ - "\1\u02bf\4\u044a\2\0\1\u044a\15\0\1\u044a\6\0\12\u044d"+ - "\1\u044b\42\0\1\u02bf\4\u044a\2\0\1\u044a\15\0\1\u044a"+ - "\6\0\1\u044c\1\u044e\1\u044d\2\u044c\2\u044d\2\u044c\1\u044d"+ - "\1\u044b\43\0\4\u044f\2\0\1\u044f\15\0\1\u044f\6\0"+ - "\12\u044f\14\0\1\u0273\27\0\4\u0450\2\0\1\u0450\15\0"+ - "\1\u0450\6\0\12\u0450\1\u043b\13\0\1\u0273\27\0\4\u044f"+ - "\2\0\1\u044f\15\0\1\u044f\6\0\12\u044f\43\0\1\u02bf"+ - "\4\u0450\2\0\1\u0450\15\0\1\u0450\6\0\12\u0451\1\u043b"+ - "\13\0\1\u0273\26\0\1\u02bf\4\u0450\2\0\1\u0450\15\0"+ - "\1\u0450\6\0\12\u0450\1\u043b\13\0\1\u0273\26\0\1\u02bf"+ - "\4\u0450\2\0\1\u0450\15\0\1\u0450\6\0\2\u0451\1\u0450"+ - "\2\u0451\2\u0450\2\u0451\1\u0450\1\u043b\13\0\1\u0273\74\0"+ - "\1\u040e\13\0\1\u0273\74\0\1\u0415\43\0\4\u0452\2\0"+ - "\1\u0452\15\0\1\u0452\6\0\12\u0452\1\u0442\43\0\4\u0446"+ - "\2\0\1\u0446\15\0\1\u0446\6\0\12\u0446\1\u03e4\42\0"+ - "\1\u0280\4\u0452\2\0\1\u0452\15\0\1\u0452\6\0\12\u0453"+ - "\1\u0442\42\0\1\u0280\4\u0452\2\0\1\u0452\15\0\1\u0452"+ - "\6\0\12\u0452\1\u0442\42\0\1\u0280\4\u0452\2\0\1\u0452"+ - "\15\0\1\u0452\6\0\2\u0453\1\u0452\2\u0453\2\u0452\2\u0453"+ - "\1\u0452\1\u0442\43\0\4\u0454\2\0\1\u0454\15\0\1\u0454"+ - "\6\0\12\u0454\14\0\1\u0230\27\0\4\u0455\2\0\1\u0455"+ - "\15\0\1\u0455\6\0\12\u0455\1\u042f\13\0\1\u0230\26\0"+ - "\1\u0280\4\u0455\2\0\1\u0455\15\0\1\u0455\6\0\12\u0455"+ - "\1\u042f\13\0\1\u0230\74\0\1\u0421\43\0\4\u0456\2\0"+ - "\1\u0456\15\0\1\u0456\6\0\12\u0456\1\u044b\43\0\4\u044f"+ - "\2\0\1\u044f\15\0\1\u044f\6\0\12\u044f\1\u03fa\42\0"+ - "\1\u02bf\4\u0456\2\0\1\u0456\15\0\1\u0456\6\0\12\u0457"+ - "\1\u044b\42\0\1\u02bf\4\u0456\2\0\1\u0456\15\0\1\u0456"+ - "\6\0\12\u0456\1\u044b\42\0\1\u02bf\4\u0456\2\0\1\u0456"+ - "\15\0\1\u0456\6\0\2\u0457\1\u0456\2\u0457\2\u0456\2\u0457"+ - "\1\u0456\1\u044b\43\0\4\u0458\2\0\1\u0458\15\0\1\u0458"+ - "\6\0\12\u0458\14\0\1\u0273\27\0\4\u0459\2\0\1\u0459"+ - "\15\0\1\u0459\6\0\12\u0459\1\u043b\13\0\1\u0273\26\0"+ - "\1\u02bf\4\u0459\2\0\1\u0459\15\0\1\u0459\6\0\12\u0459"+ - "\1\u043b\13\0\1\u0273\27\0\4\u045a\2\0\1\u045a\15\0"+ - "\1\u045a\6\0\12\u045a\1\u0442\42\0\1\u0280\4\u045a\2\0"+ - "\1\u045a\15\0\1\u045a\6\0\12\u045a\1\u0442\43\0\4\u045b"+ - "\2\0\1\u045b\15\0\1\u045b\6\0\12\u045b\14\0\1\u0230"+ - "\74\0\1\u042f\13\0\1\u0230\27\0\4\u045c\2\0\1\u045c"+ - "\15\0\1\u045c\6\0\12\u045c\1\u044b\42\0\1\u02bf\4\u045c"+ - "\2\0\1\u045c\15\0\1\u045c\6\0\12\u045c\1\u044b\43\0"+ - "\4\u045d\2\0\1\u045d\15\0\1\u045d\6\0\12\u045d\14\0"+ - "\1\u0273\74\0\1\u043b\13\0\1\u0273\74\0\1\u0442\43\0"+ - "\4\u03e4\2\0\1\u03e4\15\0\1\u03e4\6\0\12\u03e4\14\0"+ - "\1\u0230\74\0\1\u044b\43\0\4\u03fa\2\0\1\u03fa\15\0"+ - "\1\u03fa\6\0\12\u03fa\14\0\1\u0273\15\0"; + "\1\2\1\3\1\2\1\4\1\2\1\5\1\2\1\6"+ + "\1\2\1\7\1\2\1\10\3\2\1\11\5\2\1\12"+ + "\3\2\1\13\11\2\1\14\2\2\1\15\43\2\1\16"+ + "\1\2\1\17\3\2\1\20\1\21\1\2\1\22\1\2"+ + "\1\23\2\2\1\24\1\2\1\25\1\2\1\26\1\27"+ + "\3\2\1\30\2\31\1\32\1\33\1\34\6\35\1\36"+ + "\16\35\1\37\4\35\1\34\1\40\2\41\1\40\4\41"+ + "\1\42\1\41\1\2\1\34\1\43\1\34\1\2\2\34"+ + "\1\2\3\34\1\44\2\2\1\34\1\45\3\2\2\34"+ + "\1\2\244\0\1\25\11\0\1\25\20\0\1\25\22\0"+ + "\1\25\10\0\3\25\17\0\1\25\10\0\1\25\117\0"+ + "\1\25\1\0\1\25\1\0\1\25\1\0\1\25\1\0"+ + "\1\25\1\0\3\25\1\0\5\25\1\0\3\25\1\0"+ + "\11\25\1\0\2\25\1\0\16\25\1\0\2\25\1\0"+ + "\21\25\1\0\1\25\1\0\3\25\2\0\1\25\1\0"+ + "\1\25\1\0\2\25\1\0\1\25\112\0\1\25\3\0"+ + "\1\25\5\0\2\25\3\0\1\25\13\0\1\25\1\0"+ + "\1\25\4\0\2\25\4\0\1\25\1\0\1\25\3\0"+ + "\2\25\1\0\1\25\5\0\3\25\1\0\1\25\15\0"+ + "\1\25\10\0\1\25\117\0\1\25\3\0\1\25\1\0"+ + "\1\25\1\0\1\25\1\0\3\25\2\0\4\25\1\0"+ + "\3\25\2\0\3\25\1\0\4\25\1\0\2\25\2\0"+ + "\3\25\1\0\11\25\1\0\2\25\1\0\16\25\1\0"+ + "\2\25\1\0\1\25\1\0\3\25\2\0\1\25\1\0"+ + "\1\25\1\0\2\25\1\0\1\25\112\0\1\25\3\0"+ + "\1\25\3\0\1\25\1\0\3\25\2\0\1\25\1\0"+ + "\2\25\1\0\3\25\3\0\2\25\1\0\1\25\1\0"+ + "\2\25\1\0\2\25\3\0\2\25\1\0\1\25\1\0"+ + "\1\25\1\0\2\25\1\0\2\25\1\0\2\25\1\0"+ + "\5\25\1\0\5\25\1\0\2\25\1\0\2\25\1\0"+ + "\1\25\1\0\3\25\4\0\1\25\4\0\1\25\124\0"+ + "\3\25\5\0\1\25\1\0\1\25\1\0\1\25\4\0"+ + "\1\25\14\0\1\25\5\0\1\25\11\0\2\25\12\0"+ + "\1\26\1\0\2\25\12\0\1\25\117\0\1\25\1\0"+ + "\1\26\7\0\2\25\2\0\5\25\2\0\2\25\4\0"+ + "\6\25\1\0\2\25\4\0\5\25\1\0\5\25\1\0"+ + "\2\25\1\0\3\25\1\0\4\25\1\0\5\25\1\26"+ + "\1\0\1\25\1\0\1\25\1\0\3\25\2\0\1\25"+ + "\1\0\1\25\1\0\1\25\2\0\1\25\112\0\1\25"+ + "\3\0\1\25\5\0\2\25\3\0\1\25\4\0\3\25"+ + "\4\0\1\25\1\0\1\25\2\0\1\25\1\0\2\25"+ + "\4\0\1\25\1\0\1\25\3\0\2\25\1\0\1\25"+ + "\5\0\3\25\1\0\1\25\10\0\1\25\1\0\2\26"+ + "\1\0\1\25\10\0\1\25\117\0\1\25\3\0\1\25"+ + "\6\0\2\25\5\0\1\25\1\0\1\25\1\0\1\25"+ + "\1\0\11\25\2\0\1\25\4\0\1\25\4\0\6\25"+ + "\2\0\1\25\1\0\1\25\1\0\3\25\3\0\2\25"+ + "\4\0\3\25\1\0\1\25\10\0\1\25\1\0\2\25"+ + "\114\0\1\25\11\0\2\25\17\0\1\25\6\0\2\25"+ + "\4\0\1\25\5\0\1\25\2\0\1\25\5\0\3\25"+ + "\1\0\1\25\15\0\1\25\10\0\1\25\117\0\1\25"+ + "\3\0\1\25\5\0\1\25\32\0\15\25\5\0\3\25"+ + "\1\0\1\25\5\0\1\25\7\0\1\25\2\0\1\25"+ + "\5\0\1\25\2\0\1\25\1\0\1\25\201\0\1\33"+ + "\21\0\1\27\130\0\1\32\3\0\1\32\3\0\1\32"+ + "\1\0\3\32\2\0\1\32\2\0\1\32\1\0\3\32"+ + "\3\0\2\32\1\0\1\32\1\0\2\32\1\0\2\32"+ + "\3\0\2\32\1\0\1\32\3\0\2\32\1\0\2\32"+ + "\1\0\2\32\1\0\5\32\1\0\5\32\2\0\1\32"+ + "\1\0\2\32\1\0\1\32\1\0\3\32\4\0\1\32"+ + "\4\0\1\32\112\0\1\32\1\0\1\32\1\0\1\32"+ + "\1\0\1\32\1\0\1\32\1\0\3\32\1\0\5\32"+ + "\1\0\3\32\1\0\11\32\1\0\2\32\1\0\16\32"+ + "\1\0\2\32\1\0\21\32\1\0\1\32\1\0\3\32"+ + "\2\0\1\32\1\0\1\32\1\0\2\32\1\0\1\32"+ + "\112\0\1\32\1\0\1\32\1\0\1\32\3\0\1\32"+ + "\1\0\3\32\1\0\2\32\1\0\2\32\1\0\3\32"+ + "\1\0\11\32\1\0\2\32\1\0\16\32\1\0\2\32"+ + "\1\0\21\32\1\0\1\32\1\0\3\32\2\0\1\32"+ + "\1\0\1\32\1\0\2\32\1\0\1\32\112\0\1\32"+ + "\11\0\1\32\20\0\1\32\33\0\1\32\21\0\1\32"+ + "\10\0\1\32\117\0\1\32\1\0\1\32\1\0\1\32"+ + "\1\0\1\32\1\0\1\32\1\0\3\32\1\0\5\32"+ + "\1\0\3\32\1\0\6\32\1\0\2\32\1\0\2\32"+ + "\1\0\10\32\1\0\5\32\1\0\2\32\1\0\21\32"+ + "\1\0\1\32\1\0\3\32\2\0\1\32\1\0\1\32"+ + "\1\0\2\32\1\0\1\32\241\0\1\33\111\0\1\46"+ + "\1\0\1\47\1\0\1\50\1\0\1\51\1\0\1\52"+ + "\1\0\1\53\3\0\1\54\5\0\1\55\3\0\1\56"+ + "\11\0\1\57\2\0\1\60\16\0\1\61\2\0\1\62"+ + "\41\0\2\25\1\63\1\0\1\64\1\0\1\64\1\65"+ + "\1\0\1\25\2\0\1\64\32\25\1\0\12\63\1\64"+ + "\1\0\1\65\3\0\1\64\20\0\1\46\1\0\1\47"+ + "\1\0\1\50\1\0\1\51\1\0\1\52\1\0\1\66"+ + "\3\0\1\67\5\0\1\70\3\0\1\71\11\0\1\57"+ + "\2\0\1\72\16\0\1\73\2\0\1\74\41\0\1\25"+ + "\2\26\2\0\2\75\1\76\1\0\1\26\2\0\1\75"+ + "\32\25\1\0\12\26\2\0\1\76\2\0\2\75\6\0"+ + "\1\75\23\0\1\77\15\0\1\100\14\0\1\101\16\0"+ + "\1\102\2\0\1\103\21\0\1\104\20\0\1\27\1\0"+ + "\1\27\3\0\1\65\1\0\1\27\52\0\1\65\24\0"+ + "\1\46\1\0\1\47\1\0\1\50\1\0\1\51\1\0"+ + "\1\52\1\0\1\105\3\0\1\67\5\0\1\70\3\0"+ + "\1\106\11\0\1\57\2\0\1\107\16\0\1\110\2\0"+ + "\1\111\21\0\1\104\17\0\1\25\1\112\1\26\1\27"+ + "\3\0\1\112\1\0\1\112\3\0\32\25\1\0\12\26"+ + "\2\0\1\112\165\0\2\31\244\0\1\113\45\114\1\0"+ + "\3\114\1\0\2\114\1\115\3\114\3\0\1\114\4\0"+ + "\2\114\2\0\1\46\1\0\1\47\1\0\1\50\1\0"+ + "\1\51\1\0\1\52\1\0\1\53\3\0\1\54\5\0"+ + "\1\55\3\0\1\56\11\0\1\57\2\0\1\60\16\0"+ + "\1\61\2\0\1\62\41\0\2\25\1\63\1\0\1\64"+ + "\1\0\1\64\1\65\1\0\1\25\2\0\1\116\32\35"+ + "\1\117\12\120\1\64\1\114\1\121\1\114\1\0\1\114"+ + "\1\122\1\115\3\114\3\0\1\114\4\0\2\114\2\0"+ + "\1\46\1\0\1\47\1\0\1\50\1\0\1\51\1\0"+ + "\1\52\1\0\1\53\3\0\1\54\5\0\1\55\3\0"+ + "\1\56\11\0\1\57\2\0\1\60\16\0\1\61\2\0"+ + "\1\62\41\0\2\25\1\63\1\0\1\64\1\0\1\64"+ + "\1\65\1\0\1\25\2\0\1\116\10\35\1\123\6\35"+ + "\1\124\12\35\1\117\12\120\1\64\1\114\1\121\1\114"+ + "\1\0\1\114\1\122\1\115\3\114\3\0\1\114\4\0"+ + "\2\114\2\0\1\46\1\0\1\47\1\0\1\50\1\0"+ + "\1\51\1\0\1\52\1\0\1\53\3\0\1\54\5\0"+ + "\1\55\3\0\1\56\11\0\1\57\2\0\1\60\16\0"+ + "\1\61\2\0\1\62\41\0\2\25\1\63\1\0\1\64"+ + "\1\0\1\64\1\65\1\0\1\25\2\0\1\116\17\35"+ + "\1\125\12\35\1\117\12\120\1\64\1\114\1\121\1\114"+ + "\1\0\1\114\1\122\1\115\3\114\3\0\1\114\4\0"+ + "\2\114\2\0\1\46\1\0\1\47\1\0\1\50\1\0"+ + "\1\51\1\0\1\52\1\0\1\66\3\0\1\67\5\0"+ + "\1\70\3\0\1\71\11\0\1\57\2\0\1\72\16\0"+ + "\1\73\2\0\1\74\41\0\1\25\2\26\2\0\2\75"+ + "\1\76\1\0\1\26\2\0\1\126\32\35\1\117\12\41"+ + "\1\0\1\114\1\127\1\114\1\0\2\130\1\115\3\114"+ + "\2\0\1\75\1\114\4\0\2\114\2\0\1\46\1\0"+ + "\1\47\1\0\1\50\1\0\1\51\1\0\1\52\1\0"+ + "\1\66\3\0\1\67\5\0\1\70\3\0\1\71\11\0"+ + "\1\57\2\0\1\72\16\0\1\73\2\0\1\74\41\0"+ + "\1\25\2\26\2\0\2\75\1\76\1\0\1\26\2\0"+ + "\1\126\32\35\1\117\12\131\1\0\1\114\1\127\1\114"+ + "\1\0\2\130\1\115\3\114\2\0\1\75\1\114\4\0"+ + "\2\114\2\0\1\46\1\0\1\47\1\0\1\50\1\0"+ + "\1\51\1\0\1\52\1\0\1\66\3\0\1\67\5\0"+ + "\1\70\3\0\1\71\11\0\1\57\2\0\1\72\16\0"+ + "\1\73\2\0\1\74\41\0\1\25\2\26\2\0\2\75"+ + "\1\76\1\0\1\26\2\0\1\126\32\35\1\117\1\41"+ + "\1\132\1\131\2\41\2\131\2\41\1\131\1\0\1\114"+ + "\1\127\1\114\1\0\2\130\1\115\3\114\2\0\1\75"+ + "\1\114\4\0\2\114\2\0\1\46\1\0\1\47\1\0"+ + "\1\50\1\0\1\51\1\0\1\52\1\0\1\105\3\0"+ + "\1\67\5\0\1\70\3\0\1\106\11\0\1\57\2\0"+ + "\1\107\16\0\1\110\2\0\1\111\21\0\1\104\17\0"+ + "\1\25\1\112\1\26\1\27\3\0\1\112\1\0\1\112"+ + "\2\0\1\113\32\133\1\114\12\134\1\0\1\114\1\135"+ + "\1\114\1\0\2\114\1\115\3\114\3\0\1\114\4\0"+ + "\2\114\150\0\4\136\2\0\1\136\15\0\1\136\6\0"+ + "\12\136\1\137\173\0\65\140\1\141\1\140\1\142\1\0"+ + "\2\140\3\0\1\25\11\0\3\25\5\0\1\25\1\0"+ + "\1\25\1\0\1\25\4\0\1\25\4\0\1\25\1\0"+ + "\2\25\4\0\1\25\5\0\1\25\3\0\1\25\4\0"+ + "\5\25\10\0\1\63\1\0\2\25\1\0\1\25\10\0"+ + "\1\25\117\0\1\25\1\0\1\63\7\0\2\25\2\0"+ + "\5\25\2\0\2\25\4\0\6\25\1\0\2\25\4\0"+ + "\5\25\1\0\5\25\1\0\2\25\1\0\3\25\1\0"+ + "\4\25\1\0\5\25\1\63\1\0\1\25\1\0\1\25"+ + "\1\0\3\25\2\0\1\25\1\0\1\25\1\0\1\25"+ + "\2\0\1\25\112\0\1\25\3\0\1\25\5\0\2\25"+ + "\3\0\1\25\4\0\3\25\4\0\1\25\1\0\1\25"+ + "\2\0\1\25\1\0\2\25\4\0\1\25\1\0\1\25"+ + "\3\0\2\25\1\0\1\25\5\0\3\25\1\0\1\25"+ + "\10\0\1\25\1\0\2\63\1\0\1\25\10\0\1\25"+ + "\117\0\1\25\3\0\1\25\6\0\2\25\5\0\1\25"+ + "\1\0\1\25\1\0\1\25\1\0\11\25\2\0\1\25"+ + "\4\0\1\25\4\0\6\25\2\0\1\25\1\0\1\25"+ + "\1\0\3\25\1\0\1\25\1\0\2\25\4\0\3\25"+ + "\1\0\1\25\10\0\1\25\1\0\2\25\114\0\1\25"+ + "\3\0\1\25\5\0\1\25\32\0\15\25\5\0\3\25"+ + "\1\0\1\25\5\0\3\25\5\0\1\25\2\0\2\25"+ + "\4\0\1\25\2\0\1\25\1\0\1\25\176\0\2\25"+ + "\6\0\1\25\151\0\1\25\3\0\1\25\2\0\1\25"+ + "\3\0\1\25\5\0\1\25\7\0\1\25\4\0\2\25"+ + "\3\0\2\25\1\0\1\25\4\0\1\25\1\0\1\25"+ + "\2\0\2\25\1\0\3\25\1\0\1\25\2\0\4\25"+ + "\2\0\1\25\134\0\1\46\1\0\1\47\1\0\1\50"+ + "\1\0\1\51\1\0\1\52\1\0\1\143\3\0\1\54"+ + "\5\0\1\55\3\0\1\144\11\0\1\57\2\0\1\145"+ + "\16\0\1\146\2\0\1\147\41\0\1\25\2\63\2\0"+ + "\2\150\1\65\1\0\1\63\2\0\1\150\32\25\1\0"+ + "\12\63\2\0\1\65\2\0\2\150\6\0\1\150\11\0"+ + "\1\46\1\0\1\47\1\0\1\50\1\0\1\51\1\0"+ + "\1\52\1\0\1\151\3\0\1\152\5\0\1\153\3\0"+ + "\1\154\11\0\1\57\2\0\1\155\16\0\1\156\2\0"+ + "\1\157\41\0\1\25\1\64\7\0\1\64\3\0\32\25"+ + "\42\0\1\46\1\0\1\47\1\0\1\50\1\0\1\51"+ + "\1\0\1\52\1\0\1\160\3\0\1\54\5\0\1\55"+ + "\3\0\1\161\11\0\1\57\2\0\1\162\16\0\1\163"+ + "\2\0\1\164\21\0\1\104\17\0\1\25\1\65\1\63"+ + "\1\27\3\0\1\65\1\0\1\65\3\0\32\25\1\0"+ + "\12\63\2\0\1\65\25\0\1\26\11\0\3\25\5\0"+ + "\1\25\1\0\1\25\1\0\1\25\4\0\1\25\4\0"+ + "\1\26\1\0\2\26\4\0\1\25\5\0\1\25\3\0"+ + "\1\26\4\0\1\26\2\25\2\26\10\0\1\26\1\0"+ + "\2\25\1\0\1\26\10\0\1\25\117\0\1\25\3\0"+ + "\1\25\6\0\2\25\5\0\1\25\1\0\1\25\1\0"+ + "\1\25\1\0\11\25\2\0\1\25\4\0\1\25\4\0"+ + "\6\25\2\0\1\25\1\0\1\25\1\0\3\25\1\0"+ + "\1\26\1\0\2\25\4\0\3\25\1\0\1\25\10\0"+ + "\1\25\1\0\2\25\114\0\1\25\3\0\1\25\5\0"+ + "\1\25\32\0\15\25\5\0\3\25\1\0\1\25\5\0"+ + "\1\25\2\26\5\0\1\25\2\0\1\25\1\26\4\0"+ + "\1\25\2\0\1\25\1\0\1\25\176\0\2\26\6\0"+ + "\1\26\151\0\1\26\3\0\1\26\2\0\1\26\3\0"+ + "\1\26\5\0\1\26\7\0\1\26\4\0\2\26\3\0"+ + "\2\26\1\0\1\26\4\0\1\26\1\0\1\26\2\0"+ + "\2\26\1\0\3\26\1\0\1\26\2\0\4\26\2\0"+ + "\1\26\146\0\1\165\3\0\1\166\5\0\1\167\3\0"+ + "\1\170\14\0\1\171\16\0\1\172\2\0\1\173\42\0"+ + "\1\75\1\26\6\0\1\75\36\0\12\26\27\0\1\46"+ + "\1\0\1\47\1\0\1\50\1\0\1\51\1\0\1\52"+ + "\1\0\1\174\3\0\1\67\5\0\1\70\3\0\1\175"+ + "\11\0\1\57\2\0\1\176\16\0\1\177\2\0\1\200"+ + "\21\0\1\104\17\0\1\25\1\76\1\26\1\27\3\0"+ + "\1\76\1\0\1\76\3\0\32\25\1\0\12\26\2\0"+ + "\1\76\25\0\1\27\37\0\1\27\1\0\2\27\16\0"+ + "\1\27\4\0\1\27\2\0\2\27\15\0\1\27\225\0"+ + "\1\27\246\0\2\27\11\0\1\27\210\0\2\27\6\0"+ + "\1\27\151\0\1\27\3\0\1\27\2\0\1\27\3\0"+ + "\1\27\5\0\1\27\7\0\1\27\4\0\2\27\3\0"+ + "\2\27\1\0\1\27\4\0\1\27\1\0\1\27\2\0"+ + "\2\27\1\0\3\27\1\0\1\27\2\0\4\27\2\0"+ + "\1\27\246\0\1\27\130\0\1\112\11\0\3\25\5\0"+ + "\1\25\1\0\1\25\1\0\1\25\4\0\1\25\4\0"+ + "\1\112\1\0\2\112\4\0\1\25\5\0\1\25\3\0"+ + "\1\112\4\0\1\112\2\25\2\112\10\0\1\26\1\0"+ + "\2\25\1\0\1\112\10\0\1\25\117\0\1\25\3\0"+ + "\1\25\6\0\2\25\5\0\1\25\1\0\1\25\1\0"+ + "\1\25\1\0\11\25\2\0\1\25\4\0\1\25\4\0"+ + "\6\25\2\0\1\25\1\0\1\25\1\0\3\25\1\0"+ + "\1\112\1\0\2\25\4\0\3\25\1\0\1\25\10\0"+ + "\1\25\1\0\2\25\114\0\1\25\3\0\1\25\5\0"+ + "\1\25\32\0\15\25\5\0\3\25\1\0\1\25\5\0"+ + "\1\25\2\112\5\0\1\25\2\0\1\25\1\112\4\0"+ + "\1\25\2\0\1\25\1\0\1\25\176\0\2\112\6\0"+ + "\1\112\151\0\1\112\3\0\1\112\2\0\1\112\3\0"+ + "\1\112\5\0\1\112\7\0\1\112\4\0\2\112\3\0"+ + "\2\112\1\0\1\112\4\0\1\112\1\0\1\112\2\0"+ + "\2\112\1\0\3\112\1\0\1\112\2\0\4\112\2\0"+ + "\1\112\301\0\1\113\45\114\1\0\3\114\1\0\2\114"+ + "\1\115\3\114\3\0\1\114\1\140\3\0\2\114\150\0"+ + "\32\201\1\0\12\201\13\0\1\202\13\0\1\46\1\0"+ + "\1\47\1\0\1\50\1\0\1\51\1\0\1\52\1\0"+ + "\1\151\3\0\1\152\5\0\1\153\3\0\1\154\11\0"+ + "\1\57\2\0\1\155\16\0\1\156\2\0\1\157\41\0"+ + "\1\25\1\64\7\0\1\64\2\0\1\113\1\203\1\204"+ + "\1\205\1\206\1\207\1\210\1\211\1\212\1\213\1\214"+ + "\1\215\1\216\1\217\1\220\1\221\1\222\1\223\1\224"+ + "\1\225\1\226\1\227\1\230\1\231\1\232\1\233\1\234"+ + "\1\114\12\235\1\0\3\114\1\0\2\114\1\115\3\114"+ + "\3\0\1\114\1\140\3\0\2\114\147\0\1\113\32\235"+ + "\1\117\12\235\1\0\3\114\1\0\2\114\1\115\3\114"+ + "\3\0\1\114\4\0\2\114\2\0\1\46\1\0\1\47"+ + "\1\0\1\50\1\0\1\51\1\0\1\52\1\0\1\143"+ + "\3\0\1\54\5\0\1\55\3\0\1\144\11\0\1\57"+ + "\2\0\1\145\16\0\1\146\2\0\1\147\41\0\1\25"+ + "\2\63\2\0\2\150\1\65\1\0\1\63\2\0\1\236"+ + "\32\35\1\117\12\120\1\0\1\114\1\121\1\114\1\0"+ + "\2\237\1\115\3\114\2\0\1\150\1\114\4\0\2\114"+ + "\2\0\1\46\1\0\1\47\1\0\1\50\1\0\1\51"+ + "\1\0\1\52\1\0\1\160\3\0\1\54\5\0\1\55"+ + "\3\0\1\161\11\0\1\57\2\0\1\162\16\0\1\163"+ + "\2\0\1\164\21\0\1\104\17\0\1\25\1\65\1\63"+ + "\1\27\3\0\1\65\1\0\1\65\2\0\1\113\32\133"+ + "\1\114\12\240\1\0\1\114\1\121\1\114\1\0\2\114"+ + "\1\115\3\114\3\0\1\114\4\0\2\114\2\0\1\46"+ + "\1\0\1\47\1\0\1\50\1\0\1\51\1\0\1\52"+ + "\1\0\1\151\3\0\1\152\5\0\1\153\3\0\1\154"+ + "\11\0\1\57\2\0\1\155\16\0\1\156\2\0\1\157"+ + "\41\0\1\25\1\64\7\0\1\64\2\0\1\113\32\133"+ + "\13\114\1\0\3\114\1\0\2\114\1\115\3\114\3\0"+ + "\1\114\4\0\2\114\2\0\1\46\1\0\1\47\1\0"+ + "\1\50\1\0\1\51\1\0\1\52\1\0\1\53\3\0"+ + "\1\54\5\0\1\55\3\0\1\56\11\0\1\57\2\0"+ + "\1\60\16\0\1\61\2\0\1\62\41\0\2\25\1\63"+ + "\1\0\1\64\1\0\1\64\1\65\1\0\1\25\2\0"+ + "\1\116\11\35\1\241\20\35\1\117\12\120\1\64\1\114"+ + "\1\121\1\114\1\0\1\114\1\122\1\115\3\114\3\0"+ + "\1\114\4\0\2\114\2\0\1\46\1\0\1\47\1\0"+ + "\1\50\1\0\1\51\1\0\1\52\1\0\1\53\3\0"+ + "\1\54\5\0\1\55\3\0\1\56\11\0\1\57\2\0"+ + "\1\60\16\0\1\61\2\0\1\62\41\0\2\25\1\63"+ + "\1\0\1\64\1\0\1\64\1\65\1\0\1\25\2\0"+ + "\1\116\15\35\1\242\14\35\1\117\12\120\1\64\1\114"+ + "\1\121\1\114\1\0\1\114\1\122\1\115\3\114\3\0"+ + "\1\114\4\0\2\114\2\0\1\46\1\0\1\47\1\0"+ + "\1\50\1\0\1\51\1\0\1\52\1\0\1\53\3\0"+ + "\1\54\5\0\1\55\3\0\1\56\11\0\1\57\2\0"+ + "\1\60\16\0\1\61\2\0\1\62\41\0\2\25\1\63"+ + "\1\0\1\64\1\0\1\64\1\65\1\0\1\25\2\0"+ + "\1\116\17\35\1\243\12\35\1\117\12\120\1\64\1\114"+ + "\1\121\1\114\1\0\1\114\1\122\1\115\3\114\3\0"+ + "\1\114\4\0\2\114\14\0\1\165\3\0\1\166\5\0"+ + "\1\167\3\0\1\170\14\0\1\171\16\0\1\172\2\0"+ + "\1\173\42\0\1\75\1\26\6\0\1\75\2\0\1\113"+ + "\1\244\1\245\1\246\1\247\1\250\1\251\1\252\1\253"+ + "\1\254\1\255\1\256\1\257\1\260\1\261\1\262\1\263"+ + "\1\264\1\265\1\266\1\267\1\270\1\271\1\272\1\273"+ + "\1\274\1\275\1\114\1\276\2\277\1\276\4\277\1\300"+ + "\1\277\1\0\3\114\1\0\2\114\1\115\3\114\3\0"+ + "\1\114\1\140\3\0\2\114\2\0\1\46\1\0\1\47"+ + "\1\0\1\50\1\0\1\51\1\0\1\52\1\0\1\174"+ + "\3\0\1\67\5\0\1\70\3\0\1\175\11\0\1\57"+ + "\2\0\1\176\16\0\1\177\2\0\1\200\21\0\1\104"+ + "\17\0\1\25\1\76\1\26\1\27\3\0\1\76\1\0"+ + "\1\76\2\0\1\113\32\133\1\114\12\134\1\0\1\114"+ + "\1\127\1\114\1\0\2\114\1\115\3\114\3\0\1\114"+ + "\4\0\2\114\14\0\1\165\3\0\1\166\5\0\1\167"+ + "\3\0\1\170\14\0\1\171\16\0\1\172\2\0\1\173"+ + "\42\0\1\75\1\26\6\0\1\75\2\0\1\113\33\114"+ + "\12\134\1\0\3\114\1\0\2\114\1\115\3\114\3\0"+ + "\1\114\4\0\2\114\2\0\1\46\1\0\1\47\1\0"+ + "\1\50\1\0\1\51\1\0\1\52\1\0\1\66\3\0"+ + "\1\67\5\0\1\70\3\0\1\71\11\0\1\57\2\0"+ + "\1\72\16\0\1\73\2\0\1\74\41\0\1\25\2\26"+ + "\2\0\2\75\1\76\1\0\1\26\2\0\1\126\32\35"+ + "\1\117\12\301\1\0\1\114\1\127\1\114\1\0\2\130"+ + "\1\115\3\114\2\0\1\75\1\114\4\0\2\114\2\0"+ + "\1\46\1\0\1\47\1\0\1\50\1\0\1\51\1\0"+ + "\1\52\1\0\1\66\3\0\1\67\5\0\1\70\3\0"+ + "\1\71\11\0\1\57\2\0\1\72\16\0\1\73\2\0"+ + "\1\74\41\0\1\25\2\26\2\0\2\75\1\76\1\0"+ + "\1\26\2\0\1\126\32\35\1\117\2\131\1\301\2\131"+ + "\2\301\2\131\1\301\1\0\1\114\1\127\1\114\1\0"+ + "\2\130\1\115\3\114\2\0\1\75\1\114\4\0\2\114"+ + "\2\0\1\46\1\0\1\47\1\0\1\50\1\0\1\51"+ + "\1\0\1\52\1\0\1\53\3\0\1\54\5\0\1\55"+ + "\3\0\1\56\11\0\1\57\2\0\1\60\16\0\1\61"+ + "\2\0\1\62\41\0\2\25\1\63\1\0\1\64\1\0"+ + "\1\64\1\65\1\0\1\25\2\0\1\302\32\133\1\114"+ + "\12\240\1\64\1\114\1\121\1\114\1\0\1\114\1\122"+ + "\1\115\3\114\3\0\1\114\4\0\2\114\2\0\1\46"+ + "\1\0\1\47\1\0\1\50\1\0\1\51\1\0\1\52"+ + "\1\0\1\66\3\0\1\67\5\0\1\70\3\0\1\71"+ + "\11\0\1\57\2\0\1\72\16\0\1\73\2\0\1\74"+ + "\41\0\1\25\2\26\2\0\2\75\1\76\1\0\1\26"+ + "\2\0\1\303\32\133\1\114\12\134\1\0\1\114\1\127"+ + "\1\114\1\0\2\130\1\115\3\114\2\0\1\75\1\114"+ + "\4\0\2\114\150\0\4\304\2\0\1\304\15\0\1\304"+ + "\6\0\12\304\1\305\241\0\1\306\173\0\1\307\54\0"+ + "\1\115\164\0\74\140\2\0\1\63\11\0\3\25\5\0"+ + "\1\25\1\0\1\25\1\0\1\25\4\0\1\25\4\0"+ + "\1\63\1\0\2\63\4\0\1\25\5\0\1\25\3\0"+ + "\1\63\4\0\1\63\2\25\2\63\10\0\1\63\1\0"+ + "\2\25\1\0\1\63\10\0\1\25\117\0\1\25\3\0"+ + "\1\25\6\0\2\25\5\0\1\25\1\0\1\25\1\0"+ + "\1\25\1\0\11\25\2\0\1\25\4\0\1\25\4\0"+ + "\6\25\2\0\1\25\1\0\1\25\1\0\3\25\1\0"+ + "\1\63\1\0\2\25\4\0\3\25\1\0\1\25\10\0"+ + "\1\25\1\0\2\25\114\0\1\25\3\0\1\25\5\0"+ + "\1\25\32\0\15\25\5\0\3\25\1\0\1\25\5\0"+ + "\1\25\2\63\5\0\1\25\2\0\1\25\1\63\4\0"+ + "\1\25\2\0\1\25\1\0\1\25\176\0\2\63\6\0"+ + "\1\63\151\0\1\63\3\0\1\63\2\0\1\63\3\0"+ + "\1\63\5\0\1\63\7\0\1\63\4\0\2\63\3\0"+ + "\2\63\1\0\1\63\4\0\1\63\1\0\1\63\2\0"+ + "\2\63\1\0\3\63\1\0\1\63\2\0\4\63\2\0"+ + "\1\63\146\0\1\310\3\0\1\311\5\0\1\312\3\0"+ + "\1\313\14\0\1\314\16\0\1\315\2\0\1\316\42\0"+ + "\1\150\1\63\6\0\1\150\36\0\12\63\30\0\1\64"+ + "\11\0\3\25\5\0\1\25\1\0\1\25\1\0\1\25"+ + "\4\0\1\25\4\0\1\64\1\0\2\64\4\0\1\25"+ + "\5\0\1\25\3\0\1\64\4\0\1\64\2\25\2\64"+ + "\12\0\2\25\1\0\1\64\10\0\1\25\117\0\1\25"+ + "\11\0\2\25\2\0\5\25\2\0\2\25\4\0\6\25"+ + "\1\0\2\25\4\0\5\25\1\0\5\25\1\0\2\25"+ + "\1\0\3\25\1\0\4\25\1\0\5\25\2\0\1\25"+ + "\1\0\1\25\1\0\3\25\2\0\1\25\1\0\1\25"+ + "\1\0\1\25\2\0\1\25\112\0\1\25\3\0\1\25"+ + "\5\0\2\25\3\0\1\25\4\0\3\25\4\0\1\25"+ + "\1\0\1\25\2\0\1\25\1\0\2\25\4\0\1\25"+ + "\1\0\1\25\3\0\2\25\1\0\1\25\5\0\3\25"+ + "\1\0\1\25\10\0\1\25\4\0\1\25\10\0\1\25"+ + "\117\0\1\25\3\0\1\25\6\0\2\25\5\0\1\25"+ + "\1\0\1\25\1\0\1\25\1\0\11\25\2\0\1\25"+ + "\4\0\1\25\4\0\6\25\2\0\1\25\1\0\1\25"+ + "\1\0\3\25\1\0\1\64\1\0\2\25\4\0\3\25"+ + "\1\0\1\25\10\0\1\25\1\0\2\25\114\0\1\25"+ + "\3\0\1\25\5\0\1\25\32\0\15\25\5\0\3\25"+ + "\1\0\1\25\5\0\1\25\2\64\5\0\1\25\2\0"+ + "\1\25\1\64\4\0\1\25\2\0\1\25\1\0\1\25"+ + "\176\0\2\64\6\0\1\64\151\0\1\64\3\0\1\64"+ + "\2\0\1\64\3\0\1\64\5\0\1\64\7\0\1\64"+ + "\4\0\2\64\3\0\2\64\1\0\1\64\4\0\1\64"+ + "\1\0\1\64\2\0\2\64\1\0\3\64\1\0\1\64"+ + "\2\0\4\64\2\0\1\64\135\0\1\65\11\0\3\25"+ + "\5\0\1\25\1\0\1\25\1\0\1\25\4\0\1\25"+ + "\4\0\1\65\1\0\2\65\4\0\1\25\5\0\1\25"+ + "\3\0\1\65\4\0\1\65\2\25\2\65\10\0\1\63"+ + "\1\0\2\25\1\0\1\65\10\0\1\25\117\0\1\25"+ + "\3\0\1\25\6\0\2\25\5\0\1\25\1\0\1\25"+ + "\1\0\1\25\1\0\11\25\2\0\1\25\4\0\1\25"+ + "\4\0\6\25\2\0\1\25\1\0\1\25\1\0\3\25"+ + "\1\0\1\65\1\0\2\25\4\0\3\25\1\0\1\25"+ + "\10\0\1\25\1\0\2\25\114\0\1\25\3\0\1\25"+ + "\5\0\1\25\32\0\15\25\5\0\3\25\1\0\1\25"+ + "\5\0\1\25\2\65\5\0\1\25\2\0\1\25\1\65"+ + "\4\0\1\25\2\0\1\25\1\0\1\25\176\0\2\65"+ + "\6\0\1\65\151\0\1\65\3\0\1\65\2\0\1\65"+ + "\3\0\1\65\5\0\1\65\7\0\1\65\4\0\2\65"+ + "\3\0\2\65\1\0\1\65\4\0\1\65\1\0\1\65"+ + "\2\0\2\65\1\0\3\65\1\0\1\65\2\0\4\65"+ + "\2\0\1\65\135\0\1\75\37\0\1\75\1\0\2\75"+ + "\16\0\1\75\4\0\1\75\2\0\2\75\10\0\1\26"+ + "\4\0\1\75\132\0\1\26\102\0\1\26\242\0\2\26"+ + "\227\0\1\75\246\0\2\75\11\0\1\75\210\0\2\75"+ + "\6\0\1\75\151\0\1\75\3\0\1\75\2\0\1\75"+ + "\3\0\1\75\5\0\1\75\7\0\1\75\4\0\2\75"+ + "\3\0\2\75\1\0\1\75\4\0\1\75\1\0\1\75"+ + "\2\0\2\75\1\0\3\75\1\0\1\75\2\0\4\75"+ + "\2\0\1\75\135\0\1\76\11\0\3\25\5\0\1\25"+ + "\1\0\1\25\1\0\1\25\4\0\1\25\4\0\1\76"+ + "\1\0\2\76\4\0\1\25\5\0\1\25\3\0\1\76"+ + "\4\0\1\76\2\25\2\76\10\0\1\26\1\0\2\25"+ + "\1\0\1\76\10\0\1\25\117\0\1\25\3\0\1\25"+ + "\6\0\2\25\5\0\1\25\1\0\1\25\1\0\1\25"+ + "\1\0\11\25\2\0\1\25\4\0\1\25\4\0\6\25"+ + "\2\0\1\25\1\0\1\25\1\0\3\25\1\0\1\76"+ + "\1\0\2\25\4\0\3\25\1\0\1\25\10\0\1\25"+ + "\1\0\2\25\114\0\1\25\3\0\1\25\5\0\1\25"+ + "\32\0\15\25\5\0\3\25\1\0\1\25\5\0\1\25"+ + "\2\76\5\0\1\25\2\0\1\25\1\76\4\0\1\25"+ + "\2\0\1\25\1\0\1\25\176\0\2\76\6\0\1\76"+ + "\151\0\1\76\3\0\1\76\2\0\1\76\3\0\1\76"+ + "\5\0\1\76\7\0\1\76\4\0\2\76\3\0\2\76"+ + "\1\0\1\76\4\0\1\76\1\0\1\76\2\0\2\76"+ + "\1\0\3\76\1\0\1\76\2\0\4\76\2\0\1\76"+ + "\301\0\1\317\32\201\1\320\12\201\174\0\61\202\1\0"+ + "\1\321\4\202\1\322\1\0\3\202\1\0\1\46\1\0"+ + "\1\47\1\0\1\50\1\0\1\51\1\0\1\52\1\0"+ + "\1\53\3\0\1\54\5\0\1\55\3\0\1\56\11\0"+ + "\1\57\2\0\1\60\16\0\1\61\2\0\1\62\41\0"+ + "\2\25\1\63\1\0\1\64\1\0\1\64\1\65\1\0"+ + "\1\25\2\0\1\116\1\35\2\323\1\324\1\325\10\323"+ + "\1\35\1\326\5\323\6\35\1\117\12\120\1\64\1\114"+ + "\1\121\1\114\1\0\1\114\1\122\1\115\3\114\3\0"+ + "\1\114\4\0\2\114\2\0\1\46\1\0\1\47\1\0"+ + "\1\50\1\0\1\51\1\0\1\52\1\0\1\53\3\0"+ + "\1\54\5\0\1\55\3\0\1\56\11\0\1\57\2\0"+ + "\1\60\16\0\1\61\2\0\1\62\41\0\2\25\1\63"+ + "\1\0\1\64\1\0\1\64\1\65\1\0\1\25\2\0"+ + "\1\116\1\327\2\323\1\35\1\323\1\330\6\323\4\35"+ + "\1\323\1\35\2\323\1\35\1\323\1\35\3\323\1\117"+ + "\12\120\1\64\1\114\1\121\1\114\1\0\1\114\1\122"+ + "\1\115\3\114\3\0\1\114\4\0\2\114\2\0\1\46"+ + "\1\0\1\47\1\0\1\50\1\0\1\51\1\0\1\52"+ + "\1\0\1\53\3\0\1\54\5\0\1\55\3\0\1\56"+ + "\11\0\1\57\2\0\1\60\16\0\1\61\2\0\1\62"+ + "\41\0\2\25\1\63\1\0\1\64\1\0\1\64\1\65"+ + "\1\0\1\25\2\0\1\116\3\35\1\323\1\35\1\323"+ + "\4\35\1\323\10\35\1\323\2\35\1\323\2\35\1\323"+ + "\1\117\12\120\1\64\1\114\1\121\1\114\1\0\1\114"+ + "\1\122\1\115\3\114\3\0\1\114\4\0\2\114\2\0"+ + "\1\46\1\0\1\47\1\0\1\50\1\0\1\51\1\0"+ + "\1\52\1\0\1\53\3\0\1\54\5\0\1\55\3\0"+ + "\1\56\11\0\1\57\2\0\1\60\16\0\1\61\2\0"+ + "\1\62\41\0\2\25\1\63\1\0\1\64\1\0\1\64"+ + "\1\65\1\0\1\25\2\0\1\116\1\35\1\323\1\331"+ + "\2\323\2\35\1\323\6\35\3\323\11\35\1\117\12\120"+ + "\1\64\1\114\1\121\1\114\1\0\1\114\1\122\1\115"+ + "\3\114\3\0\1\114\4\0\2\114\2\0\1\46\1\0"+ + "\1\47\1\0\1\50\1\0\1\51\1\0\1\52\1\0"+ + "\1\53\3\0\1\54\5\0\1\55\3\0\1\56\11\0"+ + "\1\57\2\0\1\60\16\0\1\61\2\0\1\62\41\0"+ + "\2\25\1\63\1\0\1\64\1\0\1\64\1\65\1\0"+ + "\1\25\2\0\1\116\3\35\1\323\1\35\1\323\10\35"+ + "\1\323\1\35\2\323\10\35\1\117\12\120\1\64\1\114"+ + "\1\121\1\114\1\0\1\114\1\122\1\115\3\114\3\0"+ + "\1\114\4\0\2\114\2\0\1\46\1\0\1\47\1\0"+ + "\1\50\1\0\1\51\1\0\1\52\1\0\1\53\3\0"+ + "\1\54\5\0\1\55\3\0\1\56\11\0\1\57\2\0"+ + "\1\60\16\0\1\61\2\0\1\62\41\0\2\25\1\63"+ + "\1\0\1\64\1\0\1\64\1\65\1\0\1\25\2\0"+ + "\1\116\4\35\1\332\5\35\1\323\17\35\1\117\12\120"+ + "\1\64\1\114\1\121\1\114\1\0\1\114\1\122\1\115"+ + "\3\114\3\0\1\114\4\0\2\114\2\0\1\46\1\0"+ + "\1\47\1\0\1\50\1\0\1\51\1\0\1\52\1\0"+ + "\1\53\3\0\1\54\5\0\1\55\3\0\1\56\11\0"+ + "\1\57\2\0\1\60\16\0\1\61\2\0\1\62\41\0"+ + "\2\25\1\63\1\0\1\64\1\0\1\64\1\65\1\0"+ + "\1\25\2\0\1\116\4\35\2\323\2\35\1\323\1\35"+ + "\1\323\13\35\1\323\2\35\1\323\1\117\12\120\1\64"+ + "\1\114\1\121\1\114\1\0\1\114\1\122\1\115\3\114"+ + "\3\0\1\114\4\0\2\114\2\0\1\46\1\0\1\47"+ + "\1\0\1\50\1\0\1\51\1\0\1\52\1\0\1\53"+ + "\3\0\1\54\5\0\1\55\3\0\1\56\11\0\1\57"+ + "\2\0\1\60\16\0\1\61\2\0\1\62\41\0\2\25"+ + "\1\63\1\0\1\64\1\0\1\64\1\65\1\0\1\25"+ + "\2\0\1\116\1\323\1\35\3\323\1\333\14\323\2\35"+ + "\2\323\2\35\1\323\1\35\1\117\12\120\1\64\1\114"+ + "\1\121\1\114\1\0\1\114\1\122\1\115\3\114\3\0"+ + "\1\114\4\0\2\114\2\0\1\46\1\0\1\47\1\0"+ + "\1\50\1\0\1\51\1\0\1\52\1\0\1\53\3\0"+ + "\1\54\5\0\1\55\3\0\1\56\11\0\1\57\2\0"+ + "\1\60\16\0\1\61\2\0\1\62\41\0\2\25\1\63"+ + "\1\0\1\64\1\0\1\64\1\65\1\0\1\25\2\0"+ + "\1\116\2\35\4\323\3\35\2\323\1\334\1\323\1\35"+ + "\2\323\12\35\1\117\12\120\1\64\1\114\1\121\1\114"+ + "\1\0\1\114\1\122\1\115\3\114\3\0\1\114\4\0"+ + "\2\114\2\0\1\46\1\0\1\47\1\0\1\50\1\0"+ + "\1\51\1\0\1\52\1\0\1\53\3\0\1\54\5\0"+ + "\1\55\3\0\1\56\11\0\1\57\2\0\1\60\16\0"+ + "\1\61\2\0\1\62\41\0\2\25\1\63\1\0\1\64"+ + "\1\0\1\64\1\65\1\0\1\25\2\0\1\116\2\323"+ + "\2\35\1\323\3\35\1\323\5\35\3\323\3\35\1\323"+ + "\2\35\3\323\1\117\12\120\1\64\1\114\1\121\1\114"+ + "\1\0\1\114\1\122\1\115\3\114\3\0\1\114\4\0"+ + "\2\114\2\0\1\46\1\0\1\47\1\0\1\50\1\0"+ + "\1\51\1\0\1\52\1\0\1\53\3\0\1\54\5\0"+ + "\1\55\3\0\1\56\11\0\1\57\2\0\1\60\16\0"+ + "\1\61\2\0\1\62\41\0\2\25\1\63\1\0\1\64"+ + "\1\0\1\64\1\65\1\0\1\25\2\0\1\116\5\323"+ + "\1\335\1\35\1\323\1\336\7\323\1\337\3\323\1\35"+ + "\1\323\1\35\3\323\1\117\12\120\1\64\1\114\1\121"+ + "\1\114\1\0\1\114\1\122\1\115\3\114\3\0\1\114"+ + "\4\0\2\114\2\0\1\46\1\0\1\47\1\0\1\50"+ + "\1\0\1\51\1\0\1\52\1\0\1\53\3\0\1\54"+ + "\5\0\1\55\3\0\1\56\11\0\1\57\2\0\1\60"+ + "\16\0\1\61\2\0\1\62\41\0\2\25\1\63\1\0"+ + "\1\64\1\0\1\64\1\65\1\0\1\25\2\0\1\116"+ + "\1\340\1\323\1\35\1\327\6\323\3\35\1\323\2\35"+ + "\1\323\2\35\1\323\6\35\1\117\12\120\1\64\1\114"+ + "\1\121\1\114\1\0\1\114\1\122\1\115\3\114\3\0"+ + "\1\114\4\0\2\114\2\0\1\46\1\0\1\47\1\0"+ + "\1\50\1\0\1\51\1\0\1\52\1\0\1\53\3\0"+ + "\1\54\5\0\1\55\3\0\1\56\11\0\1\57\2\0"+ + "\1\60\16\0\1\61\2\0\1\62\41\0\2\25\1\63"+ + "\1\0\1\64\1\0\1\64\1\65\1\0\1\25\2\0"+ + "\1\116\1\323\31\35\1\117\12\120\1\64\1\114\1\121"+ + "\1\114\1\0\1\114\1\122\1\115\3\114\3\0\1\114"+ + "\4\0\2\114\2\0\1\46\1\0\1\47\1\0\1\50"+ + "\1\0\1\51\1\0\1\52\1\0\1\53\3\0\1\54"+ + "\5\0\1\55\3\0\1\56\11\0\1\57\2\0\1\60"+ + "\16\0\1\61\2\0\1\62\41\0\2\25\1\63\1\0"+ + "\1\64\1\0\1\64\1\65\1\0\1\25\2\0\1\116"+ + "\1\323\2\35\1\323\1\341\1\35\2\323\1\35\3\323"+ + "\2\35\2\323\1\35\1\323\3\35\1\323\2\35\2\323"+ + "\1\117\12\120\1\64\1\114\1\121\1\114\1\0\1\114"+ + "\1\122\1\115\3\114\3\0\1\114\4\0\2\114\2\0"+ + "\1\46\1\0\1\47\1\0\1\50\1\0\1\51\1\0"+ + "\1\52\1\0\1\53\3\0\1\54\5\0\1\55\3\0"+ + "\1\56\11\0\1\57\2\0\1\60\16\0\1\61\2\0"+ + "\1\62\41\0\2\25\1\63\1\0\1\64\1\0\1\64"+ + "\1\65\1\0\1\25\2\0\1\116\6\323\1\35\5\323"+ + "\3\35\2\323\2\35\7\323\1\117\12\120\1\64\1\114"+ + "\1\121\1\114\1\0\1\114\1\122\1\115\3\114\3\0"+ + "\1\114\4\0\2\114\2\0\1\46\1\0\1\47\1\0"+ + "\1\50\1\0\1\51\1\0\1\52\1\0\1\53\3\0"+ + "\1\54\5\0\1\55\3\0\1\56\11\0\1\57\2\0"+ + "\1\60\16\0\1\61\2\0\1\62\41\0\2\25\1\63"+ + "\1\0\1\64\1\0\1\64\1\65\1\0\1\25\2\0"+ + "\1\116\1\35\2\323\1\336\1\342\3\323\1\35\3\323"+ + "\1\35\1\323\1\35\1\323\1\35\1\323\1\35\1\323"+ + "\1\35\3\323\1\35\1\323\1\117\12\120\1\64\1\114"+ + "\1\121\1\114\1\0\1\114\1\122\1\115\3\114\3\0"+ + "\1\114\4\0\2\114\2\0\1\46\1\0\1\47\1\0"+ + "\1\50\1\0\1\51\1\0\1\52\1\0\1\53\3\0"+ + "\1\54\5\0\1\55\3\0\1\56\11\0\1\57\2\0"+ + "\1\60\16\0\1\61\2\0\1\62\41\0\2\25\1\63"+ + "\1\0\1\64\1\0\1\64\1\65\1\0\1\25\2\0"+ + "\1\116\1\323\6\35\1\323\6\35\1\323\4\35\1\323"+ + "\4\35\2\323\1\117\12\120\1\64\1\114\1\121\1\114"+ + "\1\0\1\114\1\122\1\115\3\114\3\0\1\114\4\0"+ + "\2\114\2\0\1\46\1\0\1\47\1\0\1\50\1\0"+ + "\1\51\1\0\1\52\1\0\1\53\3\0\1\54\5\0"+ + "\1\55\3\0\1\56\11\0\1\57\2\0\1\60\16\0"+ + "\1\61\2\0\1\62\41\0\2\25\1\63\1\0\1\64"+ + "\1\0\1\64\1\65\1\0\1\25\2\0\1\116\6\35"+ + "\1\323\7\35\1\323\13\35\1\117\12\120\1\64\1\114"+ + "\1\121\1\114\1\0\1\114\1\122\1\115\3\114\3\0"+ + "\1\114\4\0\2\114\2\0\1\46\1\0\1\47\1\0"+ + "\1\50\1\0\1\51\1\0\1\52\1\0\1\53\3\0"+ + "\1\54\5\0\1\55\3\0\1\56\11\0\1\57\2\0"+ + "\1\60\16\0\1\61\2\0\1\62\41\0\2\25\1\63"+ + "\1\0\1\64\1\0\1\64\1\65\1\0\1\25\2\0"+ + "\1\116\13\35\1\343\16\35\1\117\12\120\1\64\1\114"+ + "\1\121\1\114\1\0\1\114\1\122\1\115\3\114\3\0"+ + "\1\114\4\0\2\114\2\0\1\46\1\0\1\47\1\0"+ + "\1\50\1\0\1\51\1\0\1\52\1\0\1\53\3\0"+ + "\1\54\5\0\1\55\3\0\1\56\11\0\1\57\2\0"+ + "\1\60\16\0\1\61\2\0\1\62\41\0\2\25\1\63"+ + "\1\0\1\64\1\0\1\64\1\65\1\0\1\25\2\0"+ + "\1\116\1\323\11\35\1\323\6\35\1\323\10\35\1\117"+ + "\12\120\1\64\1\114\1\121\1\114\1\0\1\114\1\122"+ + "\1\115\3\114\3\0\1\114\4\0\2\114\2\0\1\46"+ + "\1\0\1\47\1\0\1\50\1\0\1\51\1\0\1\52"+ + "\1\0\1\53\3\0\1\54\5\0\1\55\3\0\1\56"+ + "\11\0\1\57\2\0\1\60\16\0\1\61\2\0\1\62"+ + "\41\0\2\25\1\63\1\0\1\64\1\0\1\64\1\65"+ + "\1\0\1\25\2\0\1\116\1\323\1\35\6\323\1\344"+ + "\1\35\2\323\2\35\2\323\1\35\1\323\1\35\6\323"+ + "\1\35\1\117\12\120\1\64\1\114\1\121\1\114\1\0"+ + "\1\114\1\122\1\115\3\114\3\0\1\114\4\0\2\114"+ + "\2\0\1\46\1\0\1\47\1\0\1\50\1\0\1\51"+ + "\1\0\1\52\1\0\1\53\3\0\1\54\5\0\1\55"+ + "\3\0\1\56\11\0\1\57\2\0\1\60\16\0\1\61"+ + "\2\0\1\62\41\0\2\25\1\63\1\0\1\64\1\0"+ + "\1\64\1\65\1\0\1\25\2\0\1\116\4\35\1\323"+ + "\5\35\2\323\3\35\2\323\10\35\1\323\1\117\12\120"+ + "\1\64\1\114\1\121\1\114\1\0\1\114\1\122\1\115"+ + "\3\114\3\0\1\114\4\0\2\114\2\0\1\46\1\0"+ + "\1\47\1\0\1\50\1\0\1\51\1\0\1\52\1\0"+ + "\1\53\3\0\1\54\5\0\1\55\3\0\1\56\11\0"+ + "\1\57\2\0\1\60\16\0\1\61\2\0\1\62\41\0"+ + "\2\25\1\63\1\0\1\64\1\0\1\64\1\65\1\0"+ + "\1\25\2\0\1\116\3\35\1\323\1\35\1\345\4\35"+ + "\1\323\2\35\1\323\14\35\1\117\12\120\1\64\1\114"+ + "\1\121\1\114\1\0\1\114\1\122\1\115\3\114\3\0"+ + "\1\114\4\0\2\114\2\0\1\46\1\0\1\47\1\0"+ + "\1\50\1\0\1\51\1\0\1\52\1\0\1\53\3\0"+ + "\1\54\5\0\1\55\3\0\1\56\11\0\1\57\2\0"+ + "\1\60\16\0\1\61\2\0\1\62\41\0\2\25\1\63"+ + "\1\0\1\64\1\0\1\64\1\65\1\0\1\25\2\0"+ + "\1\116\2\323\1\35\1\323\3\35\2\323\2\35\1\323"+ + "\4\35\1\323\11\35\1\117\12\120\1\64\1\114\1\121"+ + "\1\114\1\0\1\114\1\122\1\115\3\114\3\0\1\114"+ + "\4\0\2\114\2\0\1\46\1\0\1\47\1\0\1\50"+ + "\1\0\1\51\1\0\1\52\1\0\1\53\3\0\1\54"+ + "\5\0\1\55\3\0\1\56\11\0\1\57\2\0\1\60"+ + "\16\0\1\61\2\0\1\62\41\0\2\25\1\63\1\0"+ + "\1\64\1\0\1\64\1\65\1\0\1\25\2\0\1\116"+ + "\3\35\1\323\13\35\1\323\12\35\1\117\12\120\1\64"+ + "\1\114\1\121\1\114\1\0\1\114\1\122\1\115\3\114"+ + "\3\0\1\114\4\0\2\114\2\0\1\46\1\0\1\47"+ + "\1\0\1\50\1\0\1\51\1\0\1\52\1\0\1\53"+ + "\3\0\1\54\5\0\1\55\3\0\1\56\11\0\1\57"+ + "\2\0\1\60\16\0\1\61\2\0\1\62\41\0\2\25"+ + "\1\63\1\0\1\64\1\0\1\64\1\65\1\0\1\25"+ + "\2\0\1\116\3\35\2\323\2\35\2\323\1\35\2\323"+ + "\1\35\1\323\3\35\1\323\1\35\1\323\1\35\1\323"+ + "\2\35\1\323\1\35\1\117\12\120\1\64\1\114\1\121"+ + "\1\114\1\0\1\114\1\122\1\115\3\114\3\0\1\114"+ + "\4\0\2\114\147\0\1\346\32\235\1\117\12\235\1\0"+ + "\3\114\1\0\2\114\1\115\3\114\3\0\1\114\4\0"+ + "\2\114\14\0\1\310\3\0\1\311\5\0\1\312\3\0"+ + "\1\313\14\0\1\314\16\0\1\315\2\0\1\316\42\0"+ + "\1\150\1\63\6\0\1\150\2\0\1\113\1\244\1\245"+ + "\1\246\1\247\1\250\1\251\1\252\1\253\1\254\1\255"+ + "\1\256\1\257\1\260\1\261\1\262\1\263\1\264\1\265"+ + "\1\266\1\267\1\270\1\271\1\272\1\273\1\274\1\275"+ + "\1\114\12\120\1\0\3\114\1\0\2\114\1\115\3\114"+ + "\3\0\1\114\1\140\3\0\2\114\14\0\1\310\3\0"+ + "\1\311\5\0\1\312\3\0\1\313\14\0\1\314\16\0"+ + "\1\315\2\0\1\316\42\0\1\150\1\63\6\0\1\150"+ + "\2\0\1\113\33\114\12\240\1\0\3\114\1\0\2\114"+ + "\1\115\3\114\3\0\1\114\4\0\2\114\2\0\1\46"+ + "\1\0\1\47\1\0\1\50\1\0\1\51\1\0\1\52"+ + "\1\0\1\143\3\0\1\54\5\0\1\55\3\0\1\144"+ + "\11\0\1\57\2\0\1\145\16\0\1\146\2\0\1\147"+ + "\41\0\1\25\2\63\2\0\2\150\1\65\1\0\1\63"+ + "\2\0\1\347\32\133\1\114\12\240\1\0\1\114\1\121"+ + "\1\114\1\0\2\237\1\115\3\114\2\0\1\150\1\114"+ + "\4\0\2\114\2\0\1\46\1\0\1\47\1\0\1\50"+ + "\1\0\1\51\1\0\1\52\1\0\1\53\3\0\1\54"+ + "\5\0\1\55\3\0\1\56\11\0\1\57\2\0\1\60"+ + "\16\0\1\61\2\0\1\62\41\0\2\25\1\63\1\0"+ + "\1\64\1\0\1\64\1\65\1\0\1\25\2\0\1\116"+ + "\3\35\1\350\26\35\1\117\12\120\1\64\1\114\1\121"+ + "\1\114\1\0\1\114\1\122\1\115\3\114\3\0\1\114"+ + "\4\0\2\114\2\0\1\46\1\0\1\47\1\0\1\50"+ + "\1\0\1\51\1\0\1\52\1\0\1\53\3\0\1\54"+ + "\5\0\1\55\3\0\1\56\11\0\1\57\2\0\1\60"+ + "\16\0\1\61\2\0\1\62\41\0\2\25\1\63\1\0"+ + "\1\64\1\0\1\64\1\65\1\0\1\25\2\0\1\116"+ + "\32\35\1\117\12\120\1\351\1\114\1\121\1\114\1\0"+ + "\1\114\1\122\1\115\3\114\3\0\1\114\4\0\2\114"+ + "\2\0\1\46\1\0\1\47\1\0\1\50\1\0\1\51"+ + "\1\0\1\52\1\0\1\53\3\0\1\54\5\0\1\55"+ + "\3\0\1\56\11\0\1\57\2\0\1\60\16\0\1\61"+ + "\2\0\1\62\41\0\2\25\1\63\1\0\1\64\1\0"+ + "\1\64\1\65\1\0\1\25\2\0\1\116\15\35\1\352"+ + "\14\35\1\117\12\120\1\64\1\114\1\121\1\114\1\0"+ + "\1\114\1\122\1\115\3\114\3\0\1\114\4\0\2\114"+ + "\147\0\1\346\1\235\2\353\1\354\1\355\10\353\1\235"+ + "\1\356\5\353\6\235\1\117\12\235\1\0\3\114\1\0"+ + "\2\114\1\115\3\114\3\0\1\114\4\0\2\114\147\0"+ + "\1\346\1\357\2\353\1\235\1\353\1\360\6\353\4\235"+ + "\1\353\1\235\2\353\1\235\1\353\1\235\3\353\1\117"+ + "\12\235\1\0\3\114\1\0\2\114\1\115\3\114\3\0"+ + "\1\114\4\0\2\114\147\0\1\346\3\235\1\353\1\235"+ + "\1\353\4\235\1\353\10\235\1\353\2\235\1\353\2\235"+ + "\1\353\1\117\12\235\1\0\3\114\1\0\2\114\1\115"+ + "\3\114\3\0\1\114\4\0\2\114\147\0\1\346\1\235"+ + "\1\353\1\361\2\353\2\235\1\353\6\235\3\353\11\235"+ + "\1\117\12\235\1\0\3\114\1\0\2\114\1\115\3\114"+ + "\3\0\1\114\4\0\2\114\147\0\1\346\3\235\1\353"+ + "\1\235\1\353\10\235\1\353\1\235\2\353\10\235\1\117"+ + "\12\235\1\0\3\114\1\0\2\114\1\115\3\114\3\0"+ + "\1\114\4\0\2\114\147\0\1\346\4\235\1\362\5\235"+ + "\1\353\17\235\1\117\12\235\1\0\3\114\1\0\2\114"+ + "\1\115\3\114\3\0\1\114\4\0\2\114\147\0\1\346"+ + "\4\235\2\353\2\235\1\353\1\235\1\353\13\235\1\353"+ + "\2\235\1\353\1\117\12\235\1\0\3\114\1\0\2\114"+ + "\1\115\3\114\3\0\1\114\4\0\2\114\147\0\1\346"+ + "\1\353\1\235\3\353\1\363\14\353\2\235\2\353\2\235"+ + "\1\353\1\235\1\117\12\235\1\0\3\114\1\0\2\114"+ + "\1\115\3\114\3\0\1\114\4\0\2\114\147\0\1\346"+ + "\2\235\4\353\3\235\2\353\1\364\1\353\1\235\2\353"+ + "\12\235\1\117\12\235\1\0\3\114\1\0\2\114\1\115"+ + "\3\114\3\0\1\114\4\0\2\114\147\0\1\346\2\353"+ + "\2\235\1\353\3\235\1\353\5\235\3\353\3\235\1\353"+ + "\2\235\3\353\1\117\12\235\1\0\3\114\1\0\2\114"+ + "\1\115\3\114\3\0\1\114\4\0\2\114\147\0\1\346"+ + "\5\353\1\365\1\235\1\353\1\366\7\353\1\367\3\353"+ + "\1\235\1\353\1\235\3\353\1\117\12\235\1\0\3\114"+ + "\1\0\2\114\1\115\3\114\3\0\1\114\4\0\2\114"+ + "\147\0\1\346\1\370\1\353\1\235\1\357\6\353\3\235"+ + "\1\353\2\235\1\353\2\235\1\353\6\235\1\117\12\235"+ + "\1\0\3\114\1\0\2\114\1\115\3\114\3\0\1\114"+ + "\4\0\2\114\147\0\1\346\1\353\31\235\1\117\12\235"+ + "\1\0\3\114\1\0\2\114\1\115\3\114\3\0\1\114"+ + "\4\0\2\114\147\0\1\346\1\353\2\235\1\353\1\371"+ + "\1\235\2\353\1\235\3\353\2\235\2\353\1\235\1\353"+ + "\3\235\1\353\2\235\2\353\1\117\12\235\1\0\3\114"+ + "\1\0\2\114\1\115\3\114\3\0\1\114\4\0\2\114"+ + "\147\0\1\346\6\353\1\235\5\353\3\235\2\353\2\235"+ + "\7\353\1\117\12\235\1\0\3\114\1\0\2\114\1\115"+ + "\3\114\3\0\1\114\4\0\2\114\147\0\1\346\1\235"+ + "\2\353\1\366\1\372\3\353\1\235\3\353\1\235\1\353"+ + "\1\235\1\353\1\235\1\353\1\235\1\353\1\235\3\353"+ + "\1\235\1\353\1\117\12\235\1\0\3\114\1\0\2\114"+ + "\1\115\3\114\3\0\1\114\4\0\2\114\147\0\1\346"+ + "\1\353\6\235\1\353\6\235\1\353\4\235\1\353\4\235"+ + "\2\353\1\117\12\235\1\0\3\114\1\0\2\114\1\115"+ + "\3\114\3\0\1\114\4\0\2\114\147\0\1\346\6\235"+ + "\1\353\7\235\1\353\13\235\1\117\12\235\1\0\3\114"+ + "\1\0\2\114\1\115\3\114\3\0\1\114\4\0\2\114"+ + "\147\0\1\346\13\235\1\373\16\235\1\117\12\235\1\0"+ + "\3\114\1\0\2\114\1\115\3\114\3\0\1\114\4\0"+ + "\2\114\147\0\1\346\1\353\11\235\1\353\6\235\1\353"+ + "\10\235\1\117\12\235\1\0\3\114\1\0\2\114\1\115"+ + "\3\114\3\0\1\114\4\0\2\114\147\0\1\346\1\353"+ + "\1\235\6\353\1\374\1\235\2\353\2\235\2\353\1\235"+ + "\1\353\1\235\6\353\1\235\1\117\12\235\1\0\3\114"+ + "\1\0\2\114\1\115\3\114\3\0\1\114\4\0\2\114"+ + "\147\0\1\346\4\235\1\353\5\235\2\353\3\235\2\353"+ + "\10\235\1\353\1\117\12\235\1\0\3\114\1\0\2\114"+ + "\1\115\3\114\3\0\1\114\4\0\2\114\147\0\1\346"+ + "\3\235\1\353\1\235\1\375\4\235\1\353\2\235\1\353"+ + "\14\235\1\117\12\235\1\0\3\114\1\0\2\114\1\115"+ + "\3\114\3\0\1\114\4\0\2\114\147\0\1\346\2\353"+ + "\1\235\1\353\3\235\2\353\2\235\1\353\4\235\1\353"+ + "\11\235\1\117\12\235\1\0\3\114\1\0\2\114\1\115"+ + "\3\114\3\0\1\114\4\0\2\114\147\0\1\346\3\235"+ + "\1\353\13\235\1\353\12\235\1\117\12\235\1\0\3\114"+ + "\1\0\2\114\1\115\3\114\3\0\1\114\4\0\2\114"+ + "\147\0\1\346\3\235\2\353\2\235\2\353\1\235\2\353"+ + "\1\235\1\353\3\235\1\353\1\235\1\353\1\235\1\353"+ + "\2\235\1\353\1\235\1\117\12\235\1\0\3\114\1\0"+ + "\2\114\1\115\3\114\3\0\1\114\4\0\2\114\2\0"+ + "\1\46\1\0\1\47\1\0\1\50\1\0\1\51\1\0"+ + "\1\52\1\0\1\66\3\0\1\67\5\0\1\70\3\0"+ + "\1\71\11\0\1\57\2\0\1\72\16\0\1\73\2\0"+ + "\1\74\41\0\1\25\2\26\2\0\2\75\1\76\1\0"+ + "\1\26\2\0\1\376\32\35\1\117\12\277\1\0\1\114"+ + "\1\127\1\114\1\0\2\130\1\115\3\114\2\0\1\75"+ + "\1\114\4\0\2\114\2\0\1\46\1\0\1\47\1\0"+ + "\1\50\1\0\1\51\1\0\1\52\1\0\1\66\3\0"+ + "\1\67\5\0\1\70\3\0\1\71\11\0\1\57\2\0"+ + "\1\72\16\0\1\73\2\0\1\74\41\0\1\25\2\26"+ + "\2\0\2\75\1\76\1\0\1\26\2\0\1\376\32\35"+ + "\1\117\12\377\1\0\1\114\1\127\1\114\1\0\2\130"+ + "\1\115\3\114\2\0\1\75\1\114\4\0\2\114\2\0"+ + "\1\46\1\0\1\47\1\0\1\50\1\0\1\51\1\0"+ + "\1\52\1\0\1\66\3\0\1\67\5\0\1\70\3\0"+ + "\1\71\11\0\1\57\2\0\1\72\16\0\1\73\2\0"+ + "\1\74\41\0\1\25\2\26\2\0\2\75\1\76\1\0"+ + "\1\26\2\0\1\376\32\35\1\117\1\277\1\u0100\1\377"+ + "\2\277\2\377\2\277\1\377\1\0\1\114\1\127\1\114"+ + "\1\0\2\130\1\115\3\114\2\0\1\75\1\114\4\0"+ + "\2\114\2\0\1\46\1\0\1\47\1\0\1\50\1\0"+ + "\1\51\1\0\1\52\1\0\1\66\3\0\1\67\5\0"+ + "\1\70\3\0\1\71\11\0\1\57\2\0\1\72\16\0"+ + "\1\73\2\0\1\74\41\0\1\25\2\26\2\0\2\75"+ + "\1\76\1\0\1\26\2\0\1\u0101\32\35\1\117\12\301"+ + "\1\0\1\114\1\127\1\114\1\0\2\130\1\115\3\114"+ + "\2\0\1\75\1\114\4\0\2\114\2\0\1\46\1\0"+ + "\1\47\1\0\1\50\1\0\1\51\1\0\1\52\1\0"+ + "\1\151\3\0\1\152\5\0\1\153\3\0\1\154\11\0"+ + "\1\57\2\0\1\155\16\0\1\156\2\0\1\157\41\0"+ + "\1\25\1\64\7\0\1\64\2\0\1\113\32\133\13\114"+ + "\1\0\3\114\1\0\2\114\1\115\3\114\3\0\1\114"+ + "\1\140\3\0\2\114\14\0\1\165\3\0\1\166\5\0"+ + "\1\167\3\0\1\170\14\0\1\171\16\0\1\172\2\0"+ + "\1\173\42\0\1\75\1\26\6\0\1\75\2\0\1\113"+ + "\33\114\12\134\1\0\3\114\1\0\2\114\1\115\3\114"+ + "\3\0\1\114\1\140\3\0\2\114\150\0\4\u0102\2\0"+ + "\1\u0102\15\0\1\u0102\6\0\12\u0102\1\305\174\0\4\u0103"+ + "\2\0\1\u0103\15\0\1\u0103\6\0\12\u0103\1\u0104\174\0"+ + "\4\u0105\2\0\1\u0105\15\0\1\u0105\6\0\1\u0106\2\u0107"+ + "\1\u0106\4\u0107\1\u0108\1\u0107\14\0\1\u0109\157\0\46\114"+ + "\1\0\3\114\1\0\2\114\1\0\3\114\3\0\1\114"+ + "\1\140\3\0\2\114\3\0\1\150\37\0\1\150\1\0"+ + "\2\150\16\0\1\150\4\0\1\150\2\0\2\150\10\0"+ + "\1\63\4\0\1\150\132\0\1\63\102\0\1\63\242\0"+ + "\2\63\227\0\1\150\246\0\2\150\11\0\1\150\210\0"+ + "\2\150\6\0\1\150\151\0\1\150\3\0\1\150\2\0"+ + "\1\150\3\0\1\150\5\0\1\150\7\0\1\150\4\0"+ + "\2\150\3\0\2\150\1\0\1\150\4\0\1\150\1\0"+ + "\1\150\2\0\2\150\1\0\3\150\1\0\1\150\2\0"+ + "\4\150\2\0\1\150\302\0\1\u010a\1\u010b\1\u010c\1\u010d"+ + "\1\u010e\1\u010f\1\u0110\1\u0111\1\u0112\1\u0113\1\u0114\1\u0115"+ + "\1\u0116\1\u0117\1\u0118\1\u0119\1\u011a\1\u011b\1\u011c\1\u011d"+ + "\1\u011e\1\u011f\1\u0120\1\u0121\1\u0122\1\u0123\1\0\12\201"+ + "\175\0\32\201\1\320\12\201\174\0\74\202\1\0\1\46"+ + "\1\0\1\47\1\0\1\50\1\0\1\51\1\0\1\52"+ + "\1\0\1\53\3\0\1\54\5\0\1\55\3\0\1\56"+ + "\11\0\1\57\2\0\1\60\16\0\1\61\2\0\1\62"+ + "\41\0\2\25\1\63\1\0\1\64\1\0\1\64\1\65"+ + "\1\0\1\25\2\0\1\u0124\32\35\1\117\12\120\1\u0125"+ + "\1\114\1\121\1\114\1\0\1\114\1\122\1\115\1\u0126"+ + "\1\u0127\1\u0128\3\0\1\114\4\0\2\114\2\0\1\46"+ + "\1\0\1\47\1\0\1\50\1\0\1\51\1\0\1\52"+ + "\1\0\1\53\3\0\1\54\5\0\1\55\3\0\1\56"+ + "\11\0\1\57\2\0\1\60\16\0\1\61\2\0\1\62"+ + "\41\0\2\25\1\63\1\0\1\64\1\0\1\64\1\65"+ + "\1\0\1\25\2\0\1\u0124\4\35\1\u0129\25\35\1\117"+ + "\12\120\1\u0125\1\114\1\121\1\114\1\0\1\114\1\122"+ + "\1\115\1\u0126\1\u0127\1\u0128\3\0\1\114\4\0\2\114"+ + "\2\0\1\46\1\0\1\47\1\0\1\50\1\0\1\51"+ + "\1\0\1\52\1\0\1\53\3\0\1\54\5\0\1\55"+ + "\3\0\1\56\11\0\1\57\2\0\1\60\16\0\1\61"+ + "\2\0\1\62\41\0\2\25\1\63\1\0\1\64\1\0"+ + "\1\64\1\65\1\0\1\25\2\0\1\u0124\15\35\1\217"+ + "\14\35\1\117\12\120\1\u0125\1\114\1\121\1\114\1\0"+ + "\1\114\1\122\1\115\1\u0126\1\u0127\1\u0128\3\0\1\114"+ + "\4\0\2\114\2\0\1\46\1\0\1\47\1\0\1\50"+ + "\1\0\1\51\1\0\1\52\1\0\1\53\3\0\1\54"+ + "\5\0\1\55\3\0\1\56\11\0\1\57\2\0\1\60"+ + "\16\0\1\61\2\0\1\62\41\0\2\25\1\63\1\0"+ + "\1\64\1\0\1\64\1\65\1\0\1\25\2\0\1\u0124"+ + "\10\35\1\217\21\35\1\117\12\120\1\u0125\1\114\1\121"+ + "\1\114\1\0\1\114\1\122\1\115\1\u0126\1\u0127\1\u0128"+ + "\3\0\1\114\4\0\2\114\2\0\1\46\1\0\1\47"+ + "\1\0\1\50\1\0\1\51\1\0\1\52\1\0\1\53"+ + "\3\0\1\54\5\0\1\55\3\0\1\56\11\0\1\57"+ + "\2\0\1\60\16\0\1\61\2\0\1\62\41\0\2\25"+ + "\1\63\1\0\1\64\1\0\1\64\1\65\1\0\1\25"+ + "\2\0\1\u0124\17\35\1\323\12\35\1\117\12\120\1\u0125"+ + "\1\114\1\121\1\114\1\0\1\114\1\122\1\115\1\u0126"+ + "\1\u0127\1\u0128\3\0\1\114\4\0\2\114\2\0\1\46"+ + "\1\0\1\47\1\0\1\50\1\0\1\51\1\0\1\52"+ + "\1\0\1\53\3\0\1\54\5\0\1\55\3\0\1\56"+ + "\11\0\1\57\2\0\1\60\16\0\1\61\2\0\1\62"+ + "\41\0\2\25\1\63\1\0\1\64\1\0\1\64\1\65"+ + "\1\0\1\25\2\0\1\u0124\5\35\1\u012a\4\35\1\323"+ + "\17\35\1\117\12\120\1\u0125\1\114\1\121\1\114\1\0"+ + "\1\114\1\122\1\115\1\u0126\1\u0127\1\u0128\3\0\1\114"+ + "\4\0\2\114\2\0\1\46\1\0\1\47\1\0\1\50"+ + "\1\0\1\51\1\0\1\52\1\0\1\53\3\0\1\54"+ + "\5\0\1\55\3\0\1\56\11\0\1\57\2\0\1\60"+ + "\16\0\1\61\2\0\1\62\41\0\2\25\1\63\1\0"+ + "\1\64\1\0\1\64\1\65\1\0\1\25\2\0\1\116"+ + "\20\35\1\323\11\35\1\117\12\120\1\64\1\114\1\121"+ + "\1\114\1\0\1\114\1\122\1\115\3\114\3\0\1\114"+ + "\4\0\2\114\2\0\1\46\1\0\1\47\1\0\1\50"+ + "\1\0\1\51\1\0\1\52\1\0\1\53\3\0\1\54"+ + "\5\0\1\55\3\0\1\56\11\0\1\57\2\0\1\60"+ + "\16\0\1\61\2\0\1\62\41\0\2\25\1\63\1\0"+ + "\1\64\1\0\1\64\1\65\1\0\1\25\2\0\1\116"+ + "\7\35\1\323\22\35\1\117\12\120\1\64\1\114\1\121"+ + "\1\114\1\0\1\114\1\122\1\115\3\114\3\0\1\114"+ + "\4\0\2\114\2\0\1\46\1\0\1\47\1\0\1\50"+ + "\1\0\1\51\1\0\1\52\1\0\1\53\3\0\1\54"+ + "\5\0\1\55\3\0\1\56\11\0\1\57\2\0\1\60"+ + "\16\0\1\61\2\0\1\62\41\0\2\25\1\63\1\0"+ + "\1\64\1\0\1\64\1\65\1\0\1\25\2\0\1\116"+ + "\27\35\1\323\2\35\1\117\12\120\1\64\1\114\1\121"+ + "\1\114\1\0\1\114\1\122\1\115\3\114\3\0\1\114"+ + "\4\0\2\114\2\0\1\46\1\0\1\47\1\0\1\50"+ + "\1\0\1\51\1\0\1\52\1\0\1\53\3\0\1\54"+ + "\5\0\1\55\3\0\1\56\11\0\1\57\2\0\1\60"+ + "\16\0\1\61\2\0\1\62\41\0\2\25\1\63\1\0"+ + "\1\64\1\0\1\64\1\65\1\0\1\25\2\0\1\u0124"+ + "\6\35\1\u0129\10\35\1\323\12\35\1\117\12\120\1\u0125"+ + "\1\114\1\121\1\114\1\0\1\114\1\122\1\115\1\u0126"+ + "\1\u0127\1\u0128\3\0\1\114\4\0\2\114\2\0\1\46"+ + "\1\0\1\47\1\0\1\50\1\0\1\51\1\0\1\52"+ + "\1\0\1\53\3\0\1\54\5\0\1\55\3\0\1\56"+ + "\11\0\1\57\2\0\1\60\16\0\1\61\2\0\1\62"+ + "\41\0\2\25\1\63\1\0\1\64\1\0\1\64\1\65"+ + "\1\0\1\25\2\0\1\u0124\24\35\1\u012b\5\35\1\117"+ + "\12\120\1\u0125\1\114\1\121\1\114\1\0\1\114\1\122"+ + "\1\115\1\u0126\1\u0127\1\u0128\3\0\1\114\4\0\2\114"+ + "\2\0\1\46\1\0\1\47\1\0\1\50\1\0\1\51"+ + "\1\0\1\52\1\0\1\53\3\0\1\54\5\0\1\55"+ + "\3\0\1\56\11\0\1\57\2\0\1\60\16\0\1\61"+ + "\2\0\1\62\41\0\2\25\1\63\1\0\1\64\1\0"+ + "\1\64\1\65\1\0\1\25\2\0\1\116\11\35\1\323"+ + "\20\35\1\117\12\120\1\64\1\114\1\121\1\114\1\0"+ + "\1\114\1\122\1\115\3\114\3\0\1\114\4\0\2\114"+ + "\2\0\1\46\1\0\1\47\1\0\1\50\1\0\1\51"+ + "\1\0\1\52\1\0\1\53\3\0\1\54\5\0\1\55"+ + "\3\0\1\56\11\0\1\57\2\0\1\60\16\0\1\61"+ + "\2\0\1\62\41\0\2\25\1\63\1\0\1\64\1\0"+ + "\1\64\1\65\1\0\1\25\2\0\1\u0124\16\35\1\u012c"+ + "\13\35\1\117\12\120\1\u0125\1\114\1\121\1\114\1\0"+ + "\1\114\1\122\1\115\1\u0126\1\u0127\1\u0128\3\0\1\114"+ + "\4\0\2\114\2\0\1\46\1\0\1\47\1\0\1\50"+ + "\1\0\1\51\1\0\1\52\1\0\1\53\3\0\1\54"+ + "\5\0\1\55\3\0\1\56\11\0\1\57\2\0\1\60"+ + "\16\0\1\61\2\0\1\62\41\0\2\25\1\63\1\0"+ + "\1\64\1\0\1\64\1\65\1\0\1\25\2\0\1\u0124"+ + "\12\35\1\u012d\17\35\1\117\12\120\1\u0125\1\114\1\121"+ + "\1\114\1\0\1\114\1\122\1\115\1\u0126\1\u0127\1\u0128"+ + "\3\0\1\114\4\0\2\114\2\0\1\46\1\0\1\47"+ + "\1\0\1\50\1\0\1\51\1\0\1\52\1\0\1\53"+ + "\3\0\1\54\5\0\1\55\3\0\1\56\11\0\1\57"+ + "\2\0\1\60\16\0\1\61\2\0\1\62\41\0\2\25"+ + "\1\63\1\0\1\64\1\0\1\64\1\65\1\0\1\25"+ + "\2\0\1\u0124\5\35\1\323\24\35\1\117\12\120\1\u0125"+ + "\1\114\1\121\1\114\1\0\1\114\1\122\1\115\1\u0126"+ + "\1\u0127\1\u0128\3\0\1\114\4\0\2\114\2\0\1\46"+ + "\1\0\1\47\1\0\1\50\1\0\1\51\1\0\1\52"+ + "\1\0\1\53\3\0\1\54\5\0\1\55\3\0\1\56"+ + "\11\0\1\57\2\0\1\60\16\0\1\61\2\0\1\62"+ + "\41\0\2\25\1\63\1\0\1\64\1\0\1\64\1\65"+ + "\1\0\1\25\2\0\1\u0124\1\u012e\31\35\1\117\12\120"+ + "\1\u0125\1\114\1\121\1\114\1\0\1\114\1\122\1\115"+ + "\1\u0126\1\u0127\1\u0128\3\0\1\114\4\0\2\114\2\0"+ + "\1\46\1\0\1\47\1\0\1\50\1\0\1\51\1\0"+ + "\1\52\1\0\1\53\3\0\1\54\5\0\1\55\3\0"+ + "\1\56\11\0\1\57\2\0\1\60\16\0\1\61\2\0"+ + "\1\62\41\0\2\25\1\63\1\0\1\64\1\0\1\64"+ + "\1\65\1\0\1\25\2\0\1\116\32\35\1\u012f\12\120"+ + "\1\64\1\114\1\121\1\114\1\0\1\114\1\122\1\115"+ + "\3\114\3\0\1\114\4\0\2\114\2\0\1\46\1\0"+ + "\1\47\1\0\1\50\1\0\1\51\1\0\1\52\1\0"+ + "\1\53\3\0\1\54\5\0\1\55\3\0\1\56\11\0"+ + "\1\57\2\0\1\60\16\0\1\61\2\0\1\62\41\0"+ + "\2\25\1\63\1\0\1\64\1\0\1\64\1\65\1\0"+ + "\1\25\2\0\1\u0124\23\35\1\323\6\35\1\117\12\120"+ + "\1\u0125\1\114\1\121\1\114\1\0\1\114\1\122\1\115"+ + "\1\u0126\1\u0127\1\u0128\3\0\1\114\4\0\2\114\2\0"+ + "\1\46\1\0\1\47\1\0\1\50\1\0\1\51\1\0"+ + "\1\52\1\0\1\53\3\0\1\54\5\0\1\55\3\0"+ + "\1\56\11\0\1\57\2\0\1\60\16\0\1\61\2\0"+ + "\1\62\41\0\2\25\1\63\1\0\1\64\1\0\1\64"+ + "\1\65\1\0\1\25\2\0\1\u0124\24\35\1\u0130\5\35"+ + "\1\117\12\120\1\u0125\1\114\1\121\1\114\1\0\1\114"+ + "\1\122\1\115\1\u0126\1\u0127\1\u0128\3\0\1\114\4\0"+ + "\2\114\147\0\1\113\1\244\1\245\1\246\1\247\1\250"+ + "\1\251\1\252\1\253\1\254\1\255\1\256\1\257\1\260"+ + "\1\261\1\262\1\263\1\264\1\265\1\266\1\267\1\270"+ + "\1\271\1\272\1\273\1\274\1\275\1\114\12\235\1\0"+ + "\3\114\1\0\2\114\1\115\3\114\3\0\1\114\1\140"+ + "\3\0\2\114\14\0\1\310\3\0\1\311\5\0\1\312"+ + "\3\0\1\313\14\0\1\314\16\0\1\315\2\0\1\316"+ + "\42\0\1\150\1\63\6\0\1\150\2\0\1\113\33\114"+ + "\12\240\1\0\3\114\1\0\2\114\1\115\3\114\3\0"+ + "\1\114\1\140\3\0\2\114\2\0\1\46\1\0\1\47"+ + "\1\0\1\50\1\0\1\51\1\0\1\52\1\0\1\53"+ + "\3\0\1\54\5\0\1\55\3\0\1\56\11\0\1\57"+ + "\2\0\1\60\16\0\1\61\2\0\1\62\41\0\2\25"+ + "\1\63\1\0\1\64\1\0\1\64\1\65\1\0\1\25"+ + "\2\0\1\116\32\35\1\117\12\120\1\u0131\1\114\1\121"+ + "\1\114\1\0\1\114\1\122\1\115\3\114\3\0\1\114"+ + "\4\0\2\114\2\0\1\46\1\0\1\47\1\0\1\50"+ + "\1\0\1\51\1\0\1\52\1\0\1\151\3\0\1\152"+ + "\5\0\1\153\3\0\1\154\11\0\1\57\2\0\1\155"+ + "\16\0\1\156\2\0\1\157\41\0\1\25\1\64\7\0"+ + "\1\64\3\0\32\25\24\0\1\u0132\15\0\1\46\1\0"+ + "\1\47\1\0\1\50\1\0\1\51\1\0\1\52\1\0"+ + "\1\53\3\0\1\54\5\0\1\55\3\0\1\56\11\0"+ + "\1\57\2\0\1\60\16\0\1\61\2\0\1\62\41\0"+ + "\2\25\1\63\1\0\1\64\1\0\1\64\1\65\1\0"+ + "\1\25\2\0\1\116\16\35\1\u0133\13\35\1\117\12\120"+ + "\1\u0134\1\114\1\121\1\114\1\0\1\114\1\122\1\115"+ + "\3\114\3\0\1\114\4\0\2\114\147\0\1\u0135\32\235"+ + "\1\117\12\235\1\u0136\3\114\1\0\2\114\1\115\1\u0126"+ + "\1\u0127\1\u0128\3\0\1\114\4\0\2\114\147\0\1\u0135"+ + "\4\235\1\u0137\25\235\1\117\12\235\1\u0136\3\114\1\0"+ + "\2\114\1\115\1\u0126\1\u0127\1\u0128\3\0\1\114\4\0"+ + "\2\114\147\0\1\u0135\15\235\1\260\14\235\1\117\12\235"+ + "\1\u0136\3\114\1\0\2\114\1\115\1\u0126\1\u0127\1\u0128"+ + "\3\0\1\114\4\0\2\114\147\0\1\u0135\10\235\1\260"+ + "\21\235\1\117\12\235\1\u0136\3\114\1\0\2\114\1\115"+ + "\1\u0126\1\u0127\1\u0128\3\0\1\114\4\0\2\114\147\0"+ + "\1\u0135\17\235\1\353\12\235\1\117\12\235\1\u0136\3\114"+ + "\1\0\2\114\1\115\1\u0126\1\u0127\1\u0128\3\0\1\114"+ + "\4\0\2\114\147\0\1\u0135\5\235\1\u0138\4\235\1\353"+ + "\17\235\1\117\12\235\1\u0136\3\114\1\0\2\114\1\115"+ + "\1\u0126\1\u0127\1\u0128\3\0\1\114\4\0\2\114\147\0"+ + "\1\346\20\235\1\353\11\235\1\117\12\235\1\0\3\114"+ + "\1\0\2\114\1\115\3\114\3\0\1\114\4\0\2\114"+ + "\147\0\1\346\7\235\1\353\22\235\1\117\12\235\1\0"+ + "\3\114\1\0\2\114\1\115\3\114\3\0\1\114\4\0"+ + "\2\114\147\0\1\346\27\235\1\353\2\235\1\117\12\235"+ + "\1\0\3\114\1\0\2\114\1\115\3\114\3\0\1\114"+ + "\4\0\2\114\147\0\1\u0135\6\235\1\u0137\10\235\1\353"+ + "\12\235\1\117\12\235\1\u0136\3\114\1\0\2\114\1\115"+ + "\1\u0126\1\u0127\1\u0128\3\0\1\114\4\0\2\114\147\0"+ + "\1\u0135\24\235\1\u0139\5\235\1\117\12\235\1\u0136\3\114"+ + "\1\0\2\114\1\115\1\u0126\1\u0127\1\u0128\3\0\1\114"+ + "\4\0\2\114\147\0\1\346\11\235\1\353\20\235\1\117"+ + "\12\235\1\0\3\114\1\0\2\114\1\115\3\114\3\0"+ + "\1\114\4\0\2\114\147\0\1\u0135\16\235\1\u013a\13\235"+ + "\1\117\12\235\1\u0136\3\114\1\0\2\114\1\115\1\u0126"+ + "\1\u0127\1\u0128\3\0\1\114\4\0\2\114\147\0\1\u0135"+ + "\12\235\1\u013b\17\235\1\117\12\235\1\u0136\3\114\1\0"+ + "\2\114\1\115\1\u0126\1\u0127\1\u0128\3\0\1\114\4\0"+ + "\2\114\147\0\1\u0135\5\235\1\353\24\235\1\117\12\235"+ + "\1\u0136\3\114\1\0\2\114\1\115\1\u0126\1\u0127\1\u0128"+ + "\3\0\1\114\4\0\2\114\147\0\1\u0135\1\u013c\31\235"+ + "\1\117\12\235\1\u0136\3\114\1\0\2\114\1\115\1\u0126"+ + "\1\u0127\1\u0128\3\0\1\114\4\0\2\114\147\0\1\346"+ + "\32\235\1\u012f\12\235\1\0\3\114\1\0\2\114\1\115"+ + "\3\114\3\0\1\114\4\0\2\114\147\0\1\u0135\23\235"+ + "\1\353\6\235\1\117\12\235\1\u0136\3\114\1\0\2\114"+ + "\1\115\1\u0126\1\u0127\1\u0128\3\0\1\114\4\0\2\114"+ + "\147\0\1\u0135\24\235\1\u013d\5\235\1\117\12\235\1\u0136"+ + "\3\114\1\0\2\114\1\115\1\u0126\1\u0127\1\u0128\3\0"+ + "\1\114\4\0\2\114\14\0\1\165\3\0\1\166\5\0"+ + "\1\167\3\0\1\170\14\0\1\171\16\0\1\172\2\0"+ + "\1\173\42\0\1\75\1\26\6\0\1\75\2\0\1\113"+ + "\1\244\1\245\1\246\1\247\1\250\1\251\1\252\1\253"+ + "\1\254\1\255\1\256\1\257\1\260\1\261\1\262\1\263"+ + "\1\264\1\265\1\266\1\267\1\270\1\271\1\272\1\273"+ + "\1\274\1\275\1\114\1\u013e\2\u013f\1\u013e\4\u013f\1\u0140"+ + "\1\u013f\1\0\3\114\1\0\2\114\1\115\3\114\3\0"+ + "\1\114\1\140\3\0\2\114\2\0\1\46\1\0\1\47"+ + "\1\0\1\50\1\0\1\51\1\0\1\52\1\0\1\66"+ + "\3\0\1\67\5\0\1\70\3\0\1\71\11\0\1\57"+ + "\2\0\1\72\16\0\1\73\2\0\1\74\41\0\1\25"+ + "\2\26\2\0\2\75\1\76\1\0\1\26\2\0\1\376"+ + "\32\35\1\117\12\301\1\0\1\114\1\127\1\114\1\0"+ + "\2\130\1\115\3\114\2\0\1\75\1\114\4\0\2\114"+ + "\2\0\1\46\1\0\1\47\1\0\1\50\1\0\1\51"+ + "\1\0\1\52\1\0\1\66\3\0\1\67\5\0\1\70"+ + "\3\0\1\71\11\0\1\57\2\0\1\72\16\0\1\73"+ + "\2\0\1\74\41\0\1\25\2\26\2\0\2\75\1\76"+ + "\1\0\1\26\2\0\1\376\32\35\1\117\2\377\1\301"+ + "\2\377\2\301\2\377\1\301\1\0\1\114\1\127\1\114"+ + "\1\0\2\130\1\115\3\114\2\0\1\75\1\114\4\0"+ + "\2\114\14\0\1\165\3\0\1\166\5\0\1\167\3\0"+ + "\1\170\14\0\1\171\16\0\1\172\2\0\1\173\42\0"+ + "\1\75\1\26\6\0\1\75\2\0\1\113\1\244\1\245"+ + "\1\246\1\247\1\250\1\251\1\252\1\253\1\254\1\255"+ + "\1\256\1\257\1\260\1\261\1\262\1\263\1\264\1\265"+ + "\1\266\1\267\1\270\1\271\1\272\1\273\1\274\1\275"+ + "\1\114\12\301\1\0\3\114\1\0\2\114\1\115\3\114"+ + "\3\0\1\114\1\140\3\0\2\114\150\0\4\u0141\2\0"+ + "\1\u0141\15\0\1\u0141\6\0\12\u0141\1\305\174\0\4\u0142"+ + "\2\0\1\u0142\15\0\1\u0142\6\0\12\u0142\1\u0143\174\0"+ + "\4\u0144\2\0\1\u0144\15\0\1\u0144\6\0\1\u0145\2\u0146"+ + "\1\u0145\4\u0146\1\u0147\1\u0146\14\0\1\u0109\160\0\4\u0148"+ + "\2\0\1\u0148\15\0\1\u0148\6\0\12\u0148\1\u0149\13\0"+ + "\1\u0109\157\0\1\u014a\4\u0148\2\0\1\u0148\15\0\1\u0148"+ + "\6\0\12\u014b\1\u0149\13\0\1\u0109\157\0\1\u014a\4\u0148"+ + "\2\0\1\u0148\15\0\1\u0148\6\0\12\u014c\1\u0149\13\0"+ + "\1\u0109\157\0\1\u014a\4\u0148\2\0\1\u0148\15\0\1\u0148"+ + "\6\0\1\u014b\1\u014d\1\u014c\2\u014b\2\u014c\2\u014b\1\u014c"+ + "\1\u0149\13\0\1\u0109\225\0\1\u0136\7\0\1\u014e\1\u014f"+ + "\1\u0150\161\0\1\317\1\201\2\u0151\1\u0152\1\u0153\10\u0151"+ + "\1\201\1\u0154\5\u0151\6\201\1\320\12\201\174\0\1\317"+ + "\1\u0155\2\u0151\1\201\1\u0151\1\u0156\6\u0151\4\201\1\u0151"+ + "\1\201\2\u0151\1\201\1\u0151\1\201\3\u0151\1\320\12\201"+ + "\174\0\1\317\3\201\1\u0151\1\201\1\u0151\4\201\1\u0151"+ + "\10\201\1\u0151\2\201\1\u0151\2\201\1\u0151\1\320\12\201"+ + "\174\0\1\317\1\201\1\u0151\1\u0157\2\u0151\2\201\1\u0151"+ + "\6\201\3\u0151\11\201\1\320\12\201\174\0\1\317\3\201"+ + "\1\u0151\1\201\1\u0151\10\201\1\u0151\1\201\2\u0151\10\201"+ + "\1\320\12\201\174\0\1\317\4\201\1\u0158\5\201\1\u0151"+ + "\17\201\1\320\12\201\174\0\1\317\4\201\2\u0151\2\201"+ + "\1\u0151\1\201\1\u0151\13\201\1\u0151\2\201\1\u0151\1\320"+ + "\12\201\174\0\1\317\1\u0151\1\201\3\u0151\1\u0159\14\u0151"+ + "\2\201\2\u0151\2\201\1\u0151\1\201\1\320\12\201\174\0"+ + "\1\317\2\201\4\u0151\3\201\2\u0151\1\u015a\1\u0151\1\201"+ + "\2\u0151\12\201\1\320\12\201\174\0\1\317\2\u0151\2\201"+ + "\1\u0151\3\201\1\u0151\5\201\3\u0151\3\201\1\u0151\2\201"+ + "\3\u0151\1\320\12\201\174\0\1\317\5\u0151\1\u015b\1\201"+ + "\1\u0151\1\u015c\7\u0151\1\u015d\3\u0151\1\201\1\u0151\1\201"+ + "\3\u0151\1\320\12\201\174\0\1\317\1\u015e\1\u0151\1\201"+ + "\1\u0155\6\u0151\3\201\1\u0151\2\201\1\u0151\2\201\1\u0151"+ + "\6\201\1\320\12\201\174\0\1\317\1\u0151\31\201\1\320"+ + "\12\201\174\0\1\317\1\u0151\2\201\1\u0151\1\u015f\1\201"+ + "\2\u0151\1\201\3\u0151\2\201\2\u0151\1\201\1\u0151\3\201"+ + "\1\u0151\2\201\2\u0151\1\320\12\201\174\0\1\317\6\u0151"+ + "\1\201\5\u0151\3\201\2\u0151\2\201\7\u0151\1\320\12\201"+ + "\174\0\1\317\1\201\2\u0151\1\u015c\1\u0160\3\u0151\1\201"+ + "\3\u0151\1\201\1\u0151\1\201\1\u0151\1\201\1\u0151\1\201"+ + "\1\u0151\1\201\3\u0151\1\201\1\u0151\1\320\12\201\174\0"+ + "\1\317\1\u0151\6\201\1\u0151\6\201\1\u0151\4\201\1\u0151"+ + "\4\201\2\u0151\1\320\12\201\174\0\1\317\6\201\1\u0151"+ + "\7\201\1\u0151\13\201\1\320\12\201\174\0\1\317\13\201"+ + "\1\u0161\16\201\1\320\12\201\174\0\1\317\1\u0151\11\201"+ + "\1\u0151\6\201\1\u0151\10\201\1\320\12\201\174\0\1\317"+ + "\1\u0151\1\201\6\u0151\1\u0162\1\201\2\u0151\2\201\2\u0151"+ + "\1\201\1\u0151\1\201\6\u0151\1\201\1\320\12\201\174\0"+ + "\1\317\4\201\1\u0151\5\201\2\u0151\3\201\2\u0151\10\201"+ + "\1\u0151\1\320\12\201\174\0\1\317\3\201\1\u0151\1\201"+ + "\1\u0163\4\201\1\u0151\2\201\1\u0151\14\201\1\320\12\201"+ + "\174\0\1\317\2\u0151\1\201\1\u0151\3\201\2\u0151\2\201"+ + "\1\u0151\4\201\1\u0151\11\201\1\320\12\201\174\0\1\317"+ + "\3\201\1\u0151\13\201\1\u0151\12\201\1\320\12\201\174\0"+ + "\1\317\3\201\2\u0151\2\201\2\u0151\1\201\2\u0151\1\201"+ + "\1\u0151\3\201\1\u0151\1\201\1\u0151\1\201\1\u0151\2\201"+ + "\1\u0151\1\201\1\320\12\201\27\0\1\46\1\0\1\47"+ + "\1\0\1\50\1\0\1\51\1\0\1\52\1\0\1\151"+ + "\3\0\1\152\5\0\1\153\3\0\1\154\11\0\1\57"+ + "\2\0\1\155\16\0\1\156\2\0\1\157\41\0\1\25"+ + "\1\64\7\0\1\64\2\0\1\113\1\203\1\204\1\205"+ + "\1\206\1\207\1\210\1\211\1\212\1\213\1\214\1\215"+ + "\1\216\1\217\1\220\1\221\1\222\1\223\1\224\1\225"+ + "\1\226\1\227\1\230\1\231\1\232\1\233\1\234\1\114"+ + "\12\235\1\u0136\3\114\1\0\2\114\1\115\1\u0126\1\u0127"+ + "\1\u0128\3\0\1\114\1\140\3\0\2\114\2\0\1\46"+ + "\1\0\1\47\1\0\1\50\1\0\1\51\1\0\1\52"+ + "\1\0\1\151\3\0\1\152\5\0\1\153\3\0\1\154"+ + "\11\0\1\57\2\0\1\155\16\0\1\156\2\0\1\157"+ + "\41\0\1\25\1\64\7\0\1\64\3\0\32\25\1\0"+ + "\12\u0164\174\0\1\u0165\45\u0126\1\u014e\2\u0126\1\u0166\1\u014e"+ + "\2\u0126\1\u0167\2\u0126\1\u0128\2\0\1\u014e\1\u0126\4\0"+ + "\1\u0126\1\114\147\0\1\u0168\45\u0127\1\u014f\2\u0127\1\u0169"+ + "\1\0\2\114\1\u016a\1\u0126\1\u0127\1\u0128\2\0\1\u014f"+ + "\1\u0127\4\0\2\114\147\0\1\u016b\45\u0128\1\u0150\2\u0128"+ + "\1\u016c\1\u0150\2\u0128\1\u016d\2\u0128\1\114\2\0\1\u0150"+ + "\1\u0128\4\0\1\u0128\1\114\2\0\1\46\1\0\1\47"+ + "\1\0\1\50\1\0\1\51\1\0\1\52\1\0\1\53"+ + "\3\0\1\54\5\0\1\55\3\0\1\56\11\0\1\57"+ + "\2\0\1\60\16\0\1\61\2\0\1\62\41\0\2\25"+ + "\1\63\1\0\1\64\1\0\1\64\1\65\1\0\1\25"+ + "\2\0\1\116\5\35\1\323\24\35\1\117\12\120\1\64"+ + "\1\114\1\121\1\114\1\0\1\114\1\122\1\115\3\114"+ + "\3\0\1\114\4\0\2\114\2\0\1\46\1\0\1\47"+ + "\1\0\1\50\1\0\1\51\1\0\1\52\1\0\1\53"+ + "\3\0\1\54\5\0\1\55\3\0\1\56\11\0\1\57"+ + "\2\0\1\60\16\0\1\61\2\0\1\62\41\0\2\25"+ + "\1\63\1\0\1\64\1\0\1\64\1\65\1\0\1\25"+ + "\2\0\1\116\15\35\1\323\14\35\1\117\12\120\1\64"+ + "\1\114\1\121\1\114\1\0\1\114\1\122\1\115\3\114"+ + "\3\0\1\114\4\0\2\114\2\0\1\46\1\0\1\47"+ + "\1\0\1\50\1\0\1\51\1\0\1\52\1\0\1\53"+ + "\3\0\1\54\5\0\1\55\3\0\1\56\11\0\1\57"+ + "\2\0\1\60\16\0\1\61\2\0\1\62\41\0\2\25"+ + "\1\63\1\0\1\64\1\0\1\64\1\65\1\0\1\25"+ + "\2\0\1\116\10\35\1\323\21\35\1\117\12\120\1\64"+ + "\1\114\1\121\1\114\1\0\1\114\1\122\1\115\3\114"+ + "\3\0\1\114\4\0\2\114\2\0\1\46\1\0\1\47"+ + "\1\0\1\50\1\0\1\51\1\0\1\52\1\0\1\53"+ + "\3\0\1\54\5\0\1\55\3\0\1\56\11\0\1\57"+ + "\2\0\1\60\16\0\1\61\2\0\1\62\41\0\2\25"+ + "\1\63\1\0\1\64\1\0\1\64\1\65\1\0\1\25"+ + "\2\0\1\116\3\35\1\u016e\26\35\1\117\12\120\1\64"+ + "\1\114\1\121\1\114\1\0\1\114\1\122\1\115\3\114"+ + "\3\0\1\114\4\0\2\114\2\0\1\46\1\0\1\47"+ + "\1\0\1\50\1\0\1\51\1\0\1\52\1\0\1\53"+ + "\3\0\1\54\5\0\1\55\3\0\1\56\11\0\1\57"+ + "\2\0\1\60\16\0\1\61\2\0\1\62\41\0\2\25"+ + "\1\63\1\0\1\64\1\0\1\64\1\65\1\0\1\25"+ + "\2\0\1\116\3\35\1\323\26\35\1\117\12\120\1\64"+ + "\1\114\1\121\1\114\1\0\1\114\1\122\1\115\3\114"+ + "\3\0\1\114\4\0\2\114\2\0\1\46\1\0\1\47"+ + "\1\0\1\50\1\0\1\51\1\0\1\52\1\0\1\53"+ + "\3\0\1\54\5\0\1\55\3\0\1\56\11\0\1\57"+ + "\2\0\1\60\16\0\1\61\2\0\1\62\41\0\2\25"+ + "\1\63\1\0\1\64\1\0\1\64\1\65\1\0\1\25"+ + "\2\0\1\116\27\35\1\u016f\2\35\1\117\12\120\1\64"+ + "\1\114\1\121\1\114\1\0\1\114\1\122\1\115\3\114"+ + "\3\0\1\114\4\0\2\114\147\0\1\113\32\235\1\u0170"+ + "\12\235\1\0\3\114\1\0\2\114\1\115\3\114\3\0"+ + "\1\114\4\0\2\114\2\0\1\46\1\0\1\47\1\0"+ + "\1\50\1\0\1\51\1\0\1\52\1\0\1\53\3\0"+ + "\1\54\5\0\1\55\3\0\1\56\11\0\1\57\2\0"+ + "\1\60\16\0\1\61\2\0\1\62\41\0\2\25\1\63"+ + "\1\0\1\64\1\0\1\64\1\65\1\0\1\25\2\0"+ + "\1\116\16\35\1\323\13\35\1\117\12\120\1\64\1\114"+ + "\1\121\1\114\1\0\1\114\1\122\1\115\3\114\3\0"+ + "\1\114\4\0\2\114\2\0\1\46\1\0\1\47\1\0"+ + "\1\50\1\0\1\51\1\0\1\52\1\0\1\151\3\0"+ + "\1\152\5\0\1\153\3\0\1\154\11\0\1\57\2\0"+ + "\1\155\16\0\1\156\2\0\1\157\41\0\1\25\1\64"+ + "\7\0\1\64\3\0\32\25\24\0\1\u0171\241\0\1\u0172"+ + "\15\0\1\46\1\0\1\47\1\0\1\50\1\0\1\51"+ + "\1\0\1\52\1\0\1\53\3\0\1\54\5\0\1\55"+ + "\3\0\1\56\11\0\1\57\2\0\1\60\16\0\1\61"+ + "\2\0\1\62\41\0\2\25\1\63\1\0\1\64\1\0"+ + "\1\64\1\65\1\0\1\25\2\0\1\116\32\35\1\117"+ + "\12\120\1\u0134\1\114\1\121\1\114\1\0\1\114\1\122"+ + "\1\115\3\114\3\0\1\114\4\0\2\114\2\0\1\46"+ + "\1\0\1\47\1\0\1\50\1\0\1\51\1\0\1\52"+ + "\1\0\1\151\3\0\1\152\5\0\1\153\3\0\1\154"+ + "\11\0\1\57\2\0\1\155\16\0\1\156\2\0\1\157"+ + "\41\0\1\25\1\64\7\0\1\64\3\0\32\25\24\0"+ + "\1\u0173\162\0\1\113\1\244\1\245\1\246\1\247\1\250"+ + "\1\251\1\252\1\253\1\254\1\255\1\256\1\257\1\260"+ + "\1\261\1\262\1\263\1\264\1\265\1\266\1\267\1\270"+ + "\1\271\1\272\1\273\1\274\1\275\1\114\12\235\1\u0136"+ + "\3\114\1\0\2\114\1\115\1\u0126\1\u0127\1\u0128\3\0"+ + "\1\114\1\140\3\0\2\114\203\0\12\u0164\174\0\1\346"+ + "\5\235\1\353\24\235\1\117\12\235\1\0\3\114\1\0"+ + "\2\114\1\115\3\114\3\0\1\114\4\0\2\114\147\0"+ + "\1\346\15\235\1\353\14\235\1\117\12\235\1\0\3\114"+ + "\1\0\2\114\1\115\3\114\3\0\1\114\4\0\2\114"+ + "\147\0\1\346\10\235\1\353\21\235\1\117\12\235\1\0"+ + "\3\114\1\0\2\114\1\115\3\114\3\0\1\114\4\0"+ + "\2\114\147\0\1\346\3\235\1\u0174\26\235\1\117\12\235"+ + "\1\0\3\114\1\0\2\114\1\115\3\114\3\0\1\114"+ + "\4\0\2\114\147\0\1\346\3\235\1\353\26\235\1\117"+ + "\12\235\1\0\3\114\1\0\2\114\1\115\3\114\3\0"+ + "\1\114\4\0\2\114\147\0\1\346\27\235\1\u0175\2\235"+ + "\1\117\12\235\1\0\3\114\1\0\2\114\1\115\3\114"+ + "\3\0\1\114\4\0\2\114\147\0\1\346\16\235\1\353"+ + "\13\235\1\117\12\235\1\0\3\114\1\0\2\114\1\115"+ + "\3\114\3\0\1\114\4\0\2\114\2\0\1\46\1\0"+ + "\1\47\1\0\1\50\1\0\1\51\1\0\1\52\1\0"+ + "\1\66\3\0\1\67\5\0\1\70\3\0\1\71\11\0"+ + "\1\57\2\0\1\72\16\0\1\73\2\0\1\74\41\0"+ + "\1\25\2\26\2\0\2\75\1\76\1\0\1\26\2\0"+ + "\1\u0176\32\35\1\117\12\u013f\1\0\1\114\1\127\1\114"+ + "\1\0\2\130\1\115\3\114\2\0\1\75\1\114\4\0"+ + "\2\114\2\0\1\46\1\0\1\47\1\0\1\50\1\0"+ + "\1\51\1\0\1\52\1\0\1\66\3\0\1\67\5\0"+ + "\1\70\3\0\1\71\11\0\1\57\2\0\1\72\16\0"+ + "\1\73\2\0\1\74\41\0\1\25\2\26\2\0\2\75"+ + "\1\76\1\0\1\26\2\0\1\u0176\32\35\1\117\12\u0177"+ + "\1\0\1\114\1\127\1\114\1\0\2\130\1\115\3\114"+ + "\2\0\1\75\1\114\4\0\2\114\2\0\1\46\1\0"+ + "\1\47\1\0\1\50\1\0\1\51\1\0\1\52\1\0"+ + "\1\66\3\0\1\67\5\0\1\70\3\0\1\71\11\0"+ + "\1\57\2\0\1\72\16\0\1\73\2\0\1\74\41\0"+ + "\1\25\2\26\2\0\2\75\1\76\1\0\1\26\2\0"+ + "\1\u0176\32\35\1\117\1\u013f\1\u0178\1\u0177\2\u013f\2\u0177"+ + "\2\u013f\1\u0177\1\0\1\114\1\127\1\114\1\0\2\130"+ + "\1\115\3\114\2\0\1\75\1\114\4\0\2\114\215\0"+ + "\1\305\174\0\4\u0179\2\0\1\u0179\15\0\1\u0179\6\0"+ + "\12\u0179\1\u0143\174\0\4\u017a\2\0\1\u017a\15\0\1\u017a"+ + "\6\0\12\u017a\1\u017b\174\0\4\u017c\2\0\1\u017c\15\0"+ + "\1\u017c\6\0\12\u017c\1\u017d\13\0\1\u0109\157\0\1\u014a"+ + "\4\u017c\2\0\1\u017c\15\0\1\u017c\6\0\12\u017e\1\u017d"+ + "\13\0\1\u0109\157\0\1\u014a\4\u017c\2\0\1\u017c\15\0"+ + "\1\u017c\6\0\12\u017f\1\u017d\13\0\1\u0109\157\0\1\u014a"+ + "\4\u017c\2\0\1\u017c\15\0\1\u017c\6\0\1\u017e\1\u0180"+ + "\1\u017f\2\u017e\2\u017f\2\u017e\1\u017f\1\u017d\13\0\1\u0109"+ + "\160\0\4\u0181\2\0\1\u0181\15\0\1\u0181\6\0\12\u0181"+ + "\1\u0149\13\0\1\u0109\160\0\4\u0144\2\0\1\u0144\15\0"+ + "\1\u0144\6\0\1\u0145\2\u0146\1\u0145\4\u0146\1\u0147\1\u0146"+ + "\230\0\1\u0182\2\u0183\1\u0182\4\u0183\1\u0184\1\u0183\174\0"+ + "\1\u014a\4\u0181\2\0\1\u0181\15\0\1\u0181\6\0\12\u0185"+ + "\1\u0149\13\0\1\u0109\157\0\1\u014a\4\u0181\2\0\1\u0181"+ + "\15\0\1\u0181\6\0\12\u0181\1\u0149\13\0\1\u0109\157\0"+ + "\1\u014a\4\u0181\2\0\1\u0181\15\0\1\u0181\6\0\2\u0185"+ + "\1\u0181\2\u0185\2\u0181\2\u0185\1\u0181\1\u0149\13\0\1\u0109"+ + "\157\0\51\u014e\1\u0186\6\u014e\1\u0150\2\0\2\u014e\4\0"+ + "\1\u014e\150\0\51\u014f\1\u0187\3\0\1\u014f\1\u014e\1\u014f"+ + "\1\u0150\2\0\2\u014f\155\0\51\u0150\1\u0188\6\u0150\3\0"+ + "\2\u0150\4\0\1\u0150\150\0\1\u0189\32\201\1\320\12\201"+ + "\174\0\1\u0189\4\201\1\u018a\25\201\1\320\12\201\174\0"+ + "\1\u0189\15\201\1\u0116\14\201\1\320\12\201\174\0\1\u0189"+ + "\10\201\1\u0116\21\201\1\320\12\201\174\0\1\u0189\17\201"+ + "\1\u0151\12\201\1\320\12\201\174\0\1\u0189\5\201\1\u018b"+ + "\4\201\1\u0151\17\201\1\320\12\201\174\0\1\317\20\201"+ + "\1\u0151\11\201\1\320\12\201\174\0\1\317\7\201\1\u0151"+ + "\22\201\1\320\12\201\174\0\1\317\27\201\1\u0151\2\201"+ + "\1\320\12\201\174\0\1\u0189\6\201\1\u018a\10\201\1\u0151"+ + "\12\201\1\320\12\201\174\0\1\u0189\24\201\1\u018c\5\201"+ + "\1\320\12\201\174\0\1\317\11\201\1\u0151\20\201\1\320"+ + "\12\201\174\0\1\u0189\16\201\1\u018d\13\201\1\320\12\201"+ + "\174\0\1\u0189\12\201\1\u018e\17\201\1\320\12\201\174\0"+ + "\1\u0189\5\201\1\u0151\24\201\1\320\12\201\174\0\1\u0189"+ + "\1\u018f\31\201\1\320\12\201\174\0\1\317\32\201\1\u0190"+ + "\12\201\174\0\1\u0189\23\201\1\u0151\6\201\1\320\12\201"+ + "\174\0\1\u0189\24\201\1\u0191\5\201\1\320\12\201\230\0"+ + "\12\u0192\10\0\1\u014e\1\u014f\1\u0150\161\0\1\u0165\45\u0126"+ + "\1\u014e\2\u0126\1\u0166\1\u014e\2\u0126\1\u0167\2\u0126\1\u0128"+ + "\2\0\1\u014e\1\u0126\1\140\3\0\1\u0126\1\114\147\0"+ + "\1\113\4\u0193\2\114\1\u0193\15\114\1\u0193\6\114\12\u0193"+ + "\1\0\3\114\1\0\2\114\1\115\3\114\3\0\1\114"+ + "\4\0\2\114\147\0\51\u014e\1\u0186\6\u014e\1\u0150\1\202"+ + "\1\0\2\u014e\4\0\1\u014e\150\0\1\u0168\45\u0127\1\u014f"+ + "\2\u0127\1\u0169\1\0\2\114\1\u016a\1\u0126\1\u0127\1\u0128"+ + "\2\0\1\u014f\1\u0127\1\140\3\0\2\114\147\0\1\113"+ + "\4\u0194\2\114\1\u0194\15\114\1\u0194\6\114\12\u0194\1\0"+ + "\3\114\1\0\2\114\1\115\3\114\3\0\1\114\4\0"+ + "\2\114\147\0\51\u014f\1\u0187\3\0\1\u014f\1\u014e\1\u014f"+ + "\1\u0150\1\202\1\0\2\u014f\155\0\1\u016b\45\u0128\1\u0150"+ + "\2\u0128\1\u016c\1\u0150\2\u0128\1\u016d\2\u0128\1\114\2\0"+ + "\1\u0150\1\u0128\1\140\3\0\1\u0128\1\114\147\0\1\113"+ + "\4\u0195\2\114\1\u0195\15\114\1\u0195\6\114\12\u0195\1\0"+ + "\3\114\1\0\2\114\1\115\3\114\3\0\1\114\4\0"+ + "\2\114\147\0\51\u0150\1\u0188\6\u0150\1\0\1\202\1\0"+ + "\2\u0150\4\0\1\u0150\3\0\1\46\1\0\1\47\1\0"+ + "\1\50\1\0\1\51\1\0\1\52\1\0\1\53\3\0"+ + "\1\54\5\0\1\55\3\0\1\56\11\0\1\57\2\0"+ + "\1\60\16\0\1\61\2\0\1\62\41\0\2\25\1\63"+ + "\1\0\1\64\1\0\1\64\1\65\1\0\1\25\2\0"+ + "\1\116\20\35\1\u0196\11\35\1\117\12\120\1\64\1\114"+ + "\1\121\1\114\1\0\1\114\1\122\1\115\3\114\3\0"+ + "\1\114\4\0\2\114\2\0\1\46\1\0\1\47\1\0"+ + "\1\50\1\0\1\51\1\0\1\52\1\0\1\53\3\0"+ + "\1\54\5\0\1\55\3\0\1\56\11\0\1\57\2\0"+ + "\1\60\16\0\1\61\2\0\1\62\41\0\2\25\1\63"+ + "\1\0\1\64\1\0\1\64\1\65\1\0\1\25\2\0"+ + "\1\116\3\35\1\336\26\35\1\117\12\120\1\64\1\114"+ + "\1\121\1\114\1\0\1\114\1\122\1\115\3\114\3\0"+ + "\1\114\4\0\2\114\147\0\1\113\2\235\1\u0197\2\235"+ + "\1\u0198\1\u0199\1\u019a\2\235\1\u019b\2\235\1\u019c\3\235"+ + "\1\u019d\1\u019e\1\u019f\1\235\1\u01a0\1\u01a1\1\235\1\u01a2"+ + "\1\u01a3\1\117\1\u01a4\2\235\1\u01a5\1\235\1\u01a6\1\u01a7"+ + "\3\235\1\0\3\114\1\0\2\114\1\115\3\114\3\0"+ + "\1\114\4\0\2\114\226\0\1\u01a8\162\0\1\u01a9\32\u01aa"+ + "\1\u01a9\12\u01aa\1\u01ab\2\u01a9\1\u01ac\3\u01a9\1\u01ad\3\0"+ + "\1\u01ae\1\0\2\u01a9\4\0\1\u01a9\227\0\1\u01af\162\0"+ + "\1\346\20\235\1\u01b0\11\235\1\117\12\235\1\0\3\114"+ + "\1\0\2\114\1\115\3\114\3\0\1\114\4\0\2\114"+ + "\147\0\1\346\3\235\1\366\26\235\1\117\12\235\1\0"+ + "\3\114\1\0\2\114\1\115\3\114\3\0\1\114\4\0"+ + "\2\114\14\0\1\165\3\0\1\166\5\0\1\167\3\0"+ + "\1\170\14\0\1\171\16\0\1\172\2\0\1\173\42\0"+ + "\1\75\1\26\6\0\1\75\2\0\1\113\1\244\1\245"+ + "\1\246\1\247\1\250\1\251\1\252\1\253\1\254\1\255"+ + "\1\256\1\257\1\260\1\261\1\262\1\263\1\264\1\265"+ + "\1\266\1\267\1\270\1\271\1\272\1\273\1\274\1\275"+ + "\1\114\1\u01b1\2\u01b2\1\u01b1\4\u01b2\1\u01b3\1\u01b2\1\0"+ + "\3\114\1\0\2\114\1\115\3\114\3\0\1\114\1\140"+ + "\3\0\2\114\2\0\1\46\1\0\1\47\1\0\1\50"+ + "\1\0\1\51\1\0\1\52\1\0\1\66\3\0\1\67"+ + "\5\0\1\70\3\0\1\71\11\0\1\57\2\0\1\72"+ + "\16\0\1\73\2\0\1\74\41\0\1\25\2\26\2\0"+ + "\2\75\1\76\1\0\1\26\2\0\1\u0176\32\35\1\117"+ + "\12\301\1\0\1\114\1\127\1\114\1\0\2\130\1\115"+ + "\3\114\2\0\1\75\1\114\4\0\2\114\2\0\1\46"+ + "\1\0\1\47\1\0\1\50\1\0\1\51\1\0\1\52"+ + "\1\0\1\66\3\0\1\67\5\0\1\70\3\0\1\71"+ + "\11\0\1\57\2\0\1\72\16\0\1\73\2\0\1\74"+ + "\41\0\1\25\2\26\2\0\2\75\1\76\1\0\1\26"+ + "\2\0\1\u0176\32\35\1\117\2\u0177\1\301\2\u0177\2\301"+ + "\2\u0177\1\301\1\0\1\114\1\127\1\114\1\0\2\130"+ + "\1\115\3\114\2\0\1\75\1\114\4\0\2\114\150\0"+ + "\4\u01b4\2\0\1\u01b4\15\0\1\u01b4\6\0\12\u01b4\1\u0143"+ + "\174\0\4\u01b5\2\0\1\u01b5\15\0\1\u01b5\6\0\12\u01b5"+ + "\1\u01b6\174\0\4\u01b7\2\0\1\u01b7\15\0\1\u01b7\6\0"+ + "\1\u01b8\2\u01b9\1\u01b8\4\u01b9\1\u01ba\1\u01b9\14\0\1\u0109"+ + "\160\0\4\u01bb\2\0\1\u01bb\15\0\1\u01bb\6\0\12\u01bb"+ + "\1\u017d\13\0\1\u0109\160\0\4\u01b7\2\0\1\u01b7\15\0"+ + "\1\u01b7\6\0\1\u01b8\2\u01b9\1\u01b8\4\u01b9\1\u01ba\1\u01b9"+ + "\174\0\1\u014a\4\u01bb\2\0\1\u01bb\15\0\1\u01bb\6\0"+ + "\12\u01bc\1\u017d\13\0\1\u0109\157\0\1\u014a\4\u01bb\2\0"+ + "\1\u01bb\15\0\1\u01bb\6\0\12\u01bb\1\u017d\13\0\1\u0109"+ + "\157\0\1\u014a\4\u01bb\2\0\1\u01bb\15\0\1\u01bb\6\0"+ + "\2\u01bc\1\u01bb\2\u01bc\2\u01bb\2\u01bc\1\u01bb\1\u017d\13\0"+ + "\1\u0109\160\0\4\u01bd\2\0\1\u01bd\15\0\1\u01bd\6\0"+ + "\12\u01bd\1\u0149\13\0\1\u0109\157\0\1\u01be\33\0\12\u0183"+ + "\174\0\1\u01be\33\0\12\u01bf\174\0\1\u01be\33\0\1\u0183"+ + "\1\u01c0\1\u01bf\2\u0183\2\u01bf\2\u0183\1\u01bf\174\0\1\u014a"+ + "\4\u01bd\2\0\1\u01bd\15\0\1\u01bd\6\0\12\u01bd\1\u0149"+ + "\13\0\1\u0109\160\0\4\u01c1\2\0\1\u01c1\15\0\1\u01c1"+ + "\6\0\12\u01c1\175\0\4\u01c2\2\0\1\u01c2\15\0\1\u01c2"+ + "\6\0\12\u01c2\175\0\4\u01c3\2\0\1\u01c3\15\0\1\u01c3"+ + "\6\0\12\u01c3\174\0\1\317\5\201\1\u0151\24\201\1\320"+ + "\12\201\174\0\1\317\15\201\1\u0151\14\201\1\320\12\201"+ + "\174\0\1\317\10\201\1\u0151\21\201\1\320\12\201\174\0"+ + "\1\317\3\201\1\u01c4\26\201\1\320\12\201\174\0\1\317"+ + "\3\201\1\u0151\26\201\1\320\12\201\174\0\1\317\27\201"+ + "\1\u01c5\2\201\1\320\12\201\175\0\32\201\1\u01c6\12\201"+ + "\174\0\1\317\16\201\1\u0151\13\201\1\320\12\201\230\0"+ + "\12\u01c7\10\0\1\u014e\1\u014f\1\u0150\161\0\1\113\4\u0126"+ + "\2\114\1\u0126\15\114\1\u0126\6\114\12\u0126\1\0\3\114"+ + "\1\0\2\114\1\115\3\114\3\0\1\114\4\0\2\114"+ + "\147\0\1\113\4\u0127\2\114\1\u0127\15\114\1\u0127\6\114"+ + "\12\u0127\1\0\3\114\1\0\2\114\1\115\3\114\3\0"+ + "\1\114\4\0\2\114\147\0\1\113\4\u0128\2\114\1\u0128"+ + "\15\114\1\u0128\6\114\12\u0128\1\0\3\114\1\0\2\114"+ + "\1\115\3\114\3\0\1\114\4\0\2\114\2\0\1\46"+ + "\1\0\1\47\1\0\1\50\1\0\1\51\1\0\1\52"+ + "\1\0\1\53\3\0\1\54\5\0\1\55\3\0\1\56"+ + "\11\0\1\57\2\0\1\60\16\0\1\61\2\0\1\62"+ + "\41\0\2\25\1\63\1\0\1\64\1\0\1\64\1\65"+ + "\1\0\1\25\2\0\1\116\12\35\1\323\17\35\1\117"+ + "\12\120\1\64\1\114\1\121\1\114\1\0\1\114\1\122"+ + "\1\115\3\114\3\0\1\114\4\0\2\114\147\0\1\346"+ + "\3\235\1\u01c8\26\235\1\117\12\235\1\0\3\114\1\0"+ + "\2\114\1\115\3\114\3\0\1\114\4\0\2\114\147\0"+ + "\1\346\32\235\1\117\4\235\1\u01c9\5\235\1\0\3\114"+ + "\1\0\2\114\1\115\3\114\3\0\1\114\4\0\2\114"+ + "\147\0\1\346\10\235\1\u01ca\12\235\1\u01cb\6\235\1\117"+ + "\12\235\1\0\3\114\1\0\2\114\1\115\3\114\3\0"+ + "\1\114\4\0\2\114\147\0\1\346\32\235\1\117\2\235"+ + "\1\u01cc\7\235\1\0\3\114\1\0\2\114\1\115\3\114"+ + "\3\0\1\114\4\0\2\114\147\0\1\346\7\235\1\u01cd"+ + "\22\235\1\117\12\235\1\0\3\114\1\0\2\114\1\115"+ + "\3\114\3\0\1\114\4\0\2\114\147\0\1\346\7\235"+ + "\1\u01ce\22\235\1\117\3\235\1\u01cf\6\235\1\0\3\114"+ + "\1\0\2\114\1\115\3\114\3\0\1\114\4\0\2\114"+ + "\147\0\1\346\7\235\1\u01d0\22\235\1\117\12\235\1\0"+ + "\3\114\1\0\2\114\1\115\3\114\3\0\1\114\4\0"+ + "\2\114\147\0\1\346\31\235\1\u01d1\1\117\12\235\1\0"+ + "\3\114\1\0\2\114\1\115\3\114\3\0\1\114\4\0"+ + "\2\114\147\0\1\346\1\235\1\u01d2\30\235\1\117\12\235"+ + "\1\0\3\114\1\0\2\114\1\115\3\114\3\0\1\114"+ + "\4\0\2\114\147\0\1\346\7\235\1\u01d3\1\235\1\u01d4"+ + "\20\235\1\117\12\235\1\0\3\114\1\0\2\114\1\115"+ + "\3\114\3\0\1\114\4\0\2\114\147\0\1\346\22\235"+ + "\1\u01d5\7\235\1\117\2\235\1\u01d6\7\235\1\0\3\114"+ + "\1\0\2\114\1\115\3\114\3\0\1\114\4\0\2\114"+ + "\147\0\1\346\7\235\1\u01d7\22\235\1\117\12\235\1\0"+ + "\3\114\1\0\2\114\1\115\3\114\3\0\1\114\4\0"+ + "\2\114\147\0\1\346\7\235\1\u01d8\5\235\1\u01d9\14\235"+ + "\1\117\12\235\1\0\3\114\1\0\2\114\1\115\3\114"+ + "\3\0\1\114\4\0\2\114\147\0\1\346\23\235\1\u01da"+ + "\6\235\1\117\12\235\1\0\3\114\1\0\2\114\1\115"+ + "\3\114\3\0\1\114\4\0\2\114\147\0\1\346\32\235"+ + "\1\117\3\235\1\u01db\6\235\1\0\3\114\1\0\2\114"+ + "\1\115\3\114\3\0\1\114\4\0\2\114\147\0\1\346"+ + "\17\235\1\u01dc\12\235\1\117\12\235\1\0\3\114\1\0"+ + "\2\114\1\115\3\114\3\0\1\114\4\0\2\114\147\0"+ + "\1\346\32\235\1\117\1\u01dd\11\235\1\0\3\114\1\0"+ + "\2\114\1\115\3\114\3\0\1\114\4\0\2\114\150\0"+ + "\32\u01de\1\0\12\u01de\11\0\1\u01df\1\0\1\u01e0\160\0"+ + "\46\u01a9\1\u01ab\2\u01a9\1\u01ac\3\u01a9\1\u01ad\5\0\2\u01a9"+ + "\4\0\1\u01a9\150\0\1\u01e1\32\u01aa\1\u01e2\12\u01aa\1\u01e3"+ + "\2\u01a9\1\u01ac\3\u01a9\1\u01ad\1\0\1\u01e4\3\0\2\u01a9"+ + "\4\0\1\u01a9\150\0\46\u01ab\1\0\2\u01ab\1\u01e5\3\u01ab"+ + "\1\u01ad\5\0\2\u01ab\4\0\1\u01ab\151\0\4\u01e6\2\0"+ + "\1\u01e6\15\0\1\u01e6\6\0\12\u01e6\175\0\32\u01e7\1\0"+ + "\12\u01e7\13\0\1\u01ae\161\0\4\u01e8\2\0\1\u01e8\15\0"+ + "\1\u01e8\6\0\12\u01e8\1\u01e9\173\0\1\u01ea\32\u01eb\1\u01ea"+ + "\12\u01eb\1\u01ec\2\u01ea\1\u01ed\3\u01ea\1\u01ee\3\0\1\u01ef"+ + "\1\0\2\u01ea\4\0\1\u01ea\150\0\1\346\12\235\1\353"+ + "\17\235\1\117\12\235\1\0\3\114\1\0\2\114\1\115"+ + "\3\114\3\0\1\114\4\0\2\114\2\0\1\46\1\0"+ + "\1\47\1\0\1\50\1\0\1\51\1\0\1\52\1\0"+ + "\1\66\3\0\1\67\5\0\1\70\3\0\1\71\11\0"+ + "\1\57\2\0\1\72\16\0\1\73\2\0\1\74\41\0"+ + "\1\25\2\26\2\0\2\75\1\76\1\0\1\26\2\0"+ + "\1\u0101\32\35\1\117\12\u01b2\1\u0136\1\114\1\127\1\114"+ + "\1\0\2\130\1\115\1\u0126\1\u0127\1\u0128\2\0\1\75"+ + "\1\114\4\0\2\114\2\0\1\46\1\0\1\47\1\0"+ + "\1\50\1\0\1\51\1\0\1\52\1\0\1\66\3\0"+ + "\1\67\5\0\1\70\3\0\1\71\11\0\1\57\2\0"+ + "\1\72\16\0\1\73\2\0\1\74\41\0\1\25\2\26"+ + "\2\0\2\75\1\76\1\0\1\26\2\0\1\u0101\32\35"+ + "\1\117\12\u01f0\1\u0136\1\114\1\127\1\114\1\0\2\130"+ + "\1\115\1\u0126\1\u0127\1\u0128\2\0\1\75\1\114\4\0"+ + "\2\114\2\0\1\46\1\0\1\47\1\0\1\50\1\0"+ + "\1\51\1\0\1\52\1\0\1\66\3\0\1\67\5\0"+ + "\1\70\3\0\1\71\11\0\1\57\2\0\1\72\16\0"+ + "\1\73\2\0\1\74\41\0\1\25\2\26\2\0\2\75"+ + "\1\76\1\0\1\26\2\0\1\u0101\32\35\1\117\1\u01b2"+ + "\1\u01f1\1\u01f0\2\u01b2\2\u01f0\2\u01b2\1\u01f0\1\u0136\1\114"+ + "\1\127\1\114\1\0\2\130\1\115\1\u0126\1\u0127\1\u0128"+ + "\2\0\1\75\1\114\4\0\2\114\215\0\1\u0143\174\0"+ + "\4\u01f2\2\0\1\u01f2\15\0\1\u01f2\6\0\12\u01f2\1\u01b6"+ + "\174\0\4\u01f3\2\0\1\u01f3\15\0\1\u01f3\6\0\12\u01f3"+ + "\1\u01f4\174\0\4\u01f5\2\0\1\u01f5\15\0\1\u01f5\6\0"+ + "\12\u01f5\1\u01f6\13\0\1\u0109\157\0\1\u014a\4\u01f5\2\0"+ + "\1\u01f5\15\0\1\u01f5\6\0\12\u01f7\1\u01f6\13\0\1\u0109"+ + "\157\0\1\u014a\4\u01f5\2\0\1\u01f5\15\0\1\u01f5\6\0"+ + "\12\u01f8\1\u01f6\13\0\1\u0109\157\0\1\u014a\4\u01f5\2\0"+ + "\1\u01f5\15\0\1\u01f5\6\0\1\u01f7\1\u01f9\1\u01f8\2\u01f7"+ + "\2\u01f8\2\u01f7\1\u01f8\1\u01f6\13\0\1\u0109\160\0\4\u01fa"+ + "\2\0\1\u01fa\15\0\1\u01fa\6\0\12\u01fa\1\u017d\13\0"+ + "\1\u0109\157\0\1\u014a\4\u01fa\2\0\1\u01fa\15\0\1\u01fa"+ + "\6\0\12\u01fa\1\u017d\13\0\1\u0109\225\0\1\u0149\13\0"+ + "\1\u0109\213\0\1\u01fb\2\u01fc\1\u01fb\4\u01fc\1\u01fd\1\u01fc"+ + "\174\0\1\u01be\241\0\1\u01be\33\0\2\u01bf\1\0\2\u01bf"+ + "\2\0\2\u01bf\176\0\4\u014e\2\0\1\u014e\15\0\1\u014e"+ + "\6\0\12\u014e\175\0\4\u014f\2\0\1\u014f\15\0\1\u014f"+ + "\6\0\12\u014f\175\0\4\u0150\2\0\1\u0150\15\0\1\u0150"+ + "\6\0\12\u0150\174\0\1\317\20\201\1\u01fe\11\201\1\320"+ + "\12\201\174\0\1\317\3\201\1\u015c\26\201\1\320\12\201"+ + "\175\0\2\201\1\u01ff\2\201\1\u0200\1\u0201\1\u0202\2\201"+ + "\1\u0203\2\201\1\u0204\3\201\1\u0205\1\u0206\1\u0207\1\201"+ + "\1\u0208\1\u0209\1\201\1\u020a\1\u020b\1\320\1\u020c\2\201"+ + "\1\u020d\1\201\1\u020e\1\u020f\3\201\230\0\12\u0210\10\0"+ + "\1\u014e\1\u014f\1\u0150\161\0\1\346\24\235\1\u0211\5\235"+ + "\1\117\12\235\1\0\3\114\1\0\2\114\1\115\3\114"+ + "\3\0\1\114\4\0\2\114\147\0\1\346\1\235\1\u0212"+ + "\30\235\1\117\12\235\1\0\3\114\1\0\2\114\1\115"+ + "\3\114\3\0\1\114\4\0\2\114\147\0\1\346\14\235"+ + "\1\u0213\15\235\1\117\12\235\1\0\3\114\1\0\2\114"+ + "\1\115\3\114\3\0\1\114\4\0\2\114\147\0\1\346"+ + "\1\235\1\u0214\30\235\1\117\12\235\1\0\3\114\1\0"+ + "\2\114\1\115\3\114\3\0\1\114\4\0\2\114\147\0"+ + "\1\346\21\235\1\u0215\10\235\1\117\12\235\1\0\3\114"+ + "\1\0\2\114\1\115\3\114\3\0\1\114\4\0\2\114"+ + "\147\0\1\346\24\235\1\u0216\5\235\1\117\12\235\1\0"+ + "\3\114\1\0\2\114\1\115\3\114\3\0\1\114\4\0"+ + "\2\114\147\0\1\346\24\235\1\u0217\5\235\1\117\12\235"+ + "\1\0\3\114\1\0\2\114\1\115\3\114\3\0\1\114"+ + "\4\0\2\114\147\0\1\346\1\u0139\31\235\1\117\12\235"+ + "\1\0\3\114\1\0\2\114\1\115\3\114\3\0\1\114"+ + "\4\0\2\114\147\0\1\346\24\235\1\u0218\5\235\1\117"+ + "\12\235\1\0\3\114\1\0\2\114\1\115\3\114\3\0"+ + "\1\114\4\0\2\114\147\0\1\346\1\235\1\u0219\30\235"+ + "\1\117\12\235\1\0\3\114\1\0\2\114\1\115\3\114"+ + "\3\0\1\114\4\0\2\114\147\0\1\346\31\235\1\u021a"+ + "\1\117\12\235\1\0\3\114\1\0\2\114\1\115\3\114"+ + "\3\0\1\114\4\0\2\114\147\0\1\346\24\235\1\u021b"+ + "\5\235\1\117\12\235\1\0\3\114\1\0\2\114\1\115"+ + "\3\114\3\0\1\114\4\0\2\114\147\0\1\346\1\235"+ + "\1\u021c\30\235\1\117\12\235\1\0\3\114\1\0\2\114"+ + "\1\115\3\114\3\0\1\114\4\0\2\114\147\0\1\346"+ + "\1\u021d\31\235\1\117\12\235\1\0\3\114\1\0\2\114"+ + "\1\115\3\114\3\0\1\114\4\0\2\114\147\0\1\346"+ + "\21\235\1\u021e\10\235\1\117\12\235\1\0\3\114\1\0"+ + "\2\114\1\115\3\114\3\0\1\114\4\0\2\114\147\0"+ + "\1\346\24\235\1\u021f\5\235\1\117\12\235\1\0\3\114"+ + "\1\0\2\114\1\115\3\114\3\0\1\114\4\0\2\114"+ + "\147\0\1\346\24\235\1\u0220\5\235\1\117\12\235\1\0"+ + "\3\114\1\0\2\114\1\115\3\114\3\0\1\114\4\0"+ + "\2\114\147\0\1\346\4\235\1\u0221\25\235\1\117\12\235"+ + "\1\0\3\114\1\0\2\114\1\115\3\114\3\0\1\114"+ + "\4\0\2\114\147\0\1\346\21\235\1\u0222\10\235\1\117"+ + "\12\235\1\0\3\114\1\0\2\114\1\115\3\114\3\0"+ + "\1\114\4\0\2\114\147\0\1\346\24\235\1\u0223\5\235"+ + "\1\117\12\235\1\0\3\114\1\0\2\114\1\115\3\114"+ + "\3\0\1\114\4\0\2\114\147\0\1\346\32\235\1\117"+ + "\7\235\1\u0224\2\235\1\0\3\114\1\0\2\114\1\115"+ + "\3\114\3\0\1\114\4\0\2\114\147\0\1\346\1\u0225"+ + "\31\235\1\117\12\235\1\0\3\114\1\0\2\114\1\115"+ + "\3\114\3\0\1\114\4\0\2\114\147\0\1\u0226\32\u01de"+ + "\1\u0227\12\u01de\11\0\1\u01df\162\0\51\u01df\1\u0228\3\0"+ + "\3\u01df\1\u0150\3\0\1\u01df\156\0\4\u0229\2\0\1\u0229"+ + "\15\0\1\u0229\6\0\12\u0229\1\u022a\173\0\1\u01a9\32\u01aa"+ + "\1\u01a9\12\u01aa\1\u01ab\2\u01a9\1\u01ac\3\u01a9\1\u01ad\5\0"+ + "\2\u01a9\4\0\1\u01a9\150\0\1\u01a9\32\u01aa\1\u01e2\12\u01aa"+ + "\1\u01ab\2\u01a9\1\u01ac\3\u01a9\1\u01ad\5\0\2\u01a9\4\0"+ + "\1\u01a9\150\0\34\u01ab\12\u022b\1\0\2\u01ab\1\u01e5\3\u01ab"+ + "\1\u01ad\5\0\2\u01ab\4\0\1\u01ab\150\0\51\u01e4\1\u022c"+ + "\3\0\3\u01e4\1\u0150\2\0\1\u022d\1\u01e4\156\0\4\u022e"+ + "\2\0\1\u022e\15\0\1\u022e\6\0\12\u022e\175\0\4\u01a9"+ + "\2\0\1\u01a9\15\0\1\u01a9\6\0\12\u01a9\174\0\1\u022f"+ + "\32\u01e7\1\u0230\12\u01e7\1\u0231\10\0\1\u01e4\163\0\4\u0232"+ + "\2\0\1\u0232\15\0\1\u0232\6\0\12\u0232\1\u0233\241\0"+ + "\1\u0234\173\0\46\u01ea\1\u01ec\2\u01ea\1\u01ed\3\u01ea\1\u01ee"+ + "\5\0\2\u01ea\4\0\1\u01ea\150\0\1\u0235\32\u01eb\1\u0236"+ + "\12\u01eb\1\u0237\2\u01ea\1\u01ed\3\u01ea\1\u01ee\1\u014e\1\u014f"+ + "\1\u0150\2\0\2\u01ea\4\0\1\u01ea\150\0\46\u01ec\1\0"+ + "\2\u01ec\1\u0238\3\u01ec\1\u01ee\5\0\2\u01ec\4\0\1\u01ec"+ + "\151\0\4\u0239\2\0\1\u0239\15\0\1\u0239\6\0\12\u0239"+ + "\175\0\32\u023a\1\0\12\u023a\13\0\1\u01ef\13\0\1\46"+ + "\1\0\1\47\1\0\1\50\1\0\1\51\1\0\1\52"+ + "\1\0\1\66\3\0\1\67\5\0\1\70\3\0\1\71"+ + "\11\0\1\57\2\0\1\72\16\0\1\73\2\0\1\74"+ + "\41\0\1\25\2\26\2\0\2\75\1\76\1\0\1\26"+ + "\2\0\1\u0101\32\35\1\117\12\301\1\u0136\1\114\1\127"+ + "\1\114\1\0\2\130\1\115\1\u0126\1\u0127\1\u0128\2\0"+ + "\1\75\1\114\4\0\2\114\2\0\1\46\1\0\1\47"+ + "\1\0\1\50\1\0\1\51\1\0\1\52\1\0\1\66"+ + "\3\0\1\67\5\0\1\70\3\0\1\71\11\0\1\57"+ + "\2\0\1\72\16\0\1\73\2\0\1\74\41\0\1\25"+ + "\2\26\2\0\2\75\1\76\1\0\1\26\2\0\1\u0101"+ + "\32\35\1\117\2\u01f0\1\301\2\u01f0\2\301\2\u01f0\1\301"+ + "\1\u0136\1\114\1\127\1\114\1\0\2\130\1\115\1\u0126"+ + "\1\u0127\1\u0128\2\0\1\75\1\114\4\0\2\114\150\0"+ + "\4\u023b\2\0\1\u023b\15\0\1\u023b\6\0\12\u023b\1\u01b6"+ + "\174\0\4\u023c\2\0\1\u023c\15\0\1\u023c\6\0\12\u023c"+ + "\1\u023d\174\0\4\u023e\2\0\1\u023e\15\0\1\u023e\6\0"+ + "\1\u023f\2\u0240\1\u023f\4\u0240\1\u0241\1\u0240\14\0\1\u0109"+ + "\160\0\4\u0242\2\0\1\u0242\15\0\1\u0242\6\0\12\u0242"+ + "\1\u01f6\13\0\1\u0109\160\0\4\u023e\2\0\1\u023e\15\0"+ + "\1\u023e\6\0\1\u023f\2\u0240\1\u023f\4\u0240\1\u0241\1\u0240"+ + "\174\0\1\u014a\4\u0242\2\0\1\u0242\15\0\1\u0242\6\0"+ + "\12\u0243\1\u01f6\13\0\1\u0109\157\0\1\u014a\4\u0242\2\0"+ + "\1\u0242\15\0\1\u0242\6\0\12\u0242\1\u01f6\13\0\1\u0109"+ + "\157\0\1\u014a\4\u0242\2\0\1\u0242\15\0\1\u0242\6\0"+ + "\2\u0243\1\u0242\2\u0243\2\u0242\2\u0243\1\u0242\1\u01f6\13\0"+ + "\1\u0109\225\0\1\u017d\13\0\1\u0109\157\0\1\u0244\33\0"+ + "\12\u01fc\174\0\1\u0244\33\0\12\u0245\174\0\1\u0244\33\0"+ + "\1\u01fc\1\u0246\1\u0245\2\u01fc\2\u0245\2\u01fc\1\u0245\174\0"+ + "\1\317\12\201\1\u0151\17\201\1\320\12\201\174\0\1\317"+ + "\3\201\1\u0247\26\201\1\320\12\201\174\0\1\317\32\201"+ + "\1\320\4\201\1\u0248\5\201\174\0\1\317\10\201\1\u0249"+ + "\12\201\1\u024a\6\201\1\320\12\201\174\0\1\317\32\201"+ + "\1\320\2\201\1\u024b\7\201\174\0\1\317\7\201\1\u024c"+ + "\22\201\1\320\12\201\174\0\1\317\7\201\1\u024d\22\201"+ + "\1\320\3\201\1\u024e\6\201\174\0\1\317\7\201\1\u024f"+ + "\22\201\1\320\12\201\174\0\1\317\31\201\1\u0250\1\320"+ + "\12\201\174\0\1\317\1\201\1\u0251\30\201\1\320\12\201"+ + "\174\0\1\317\7\201\1\u0252\1\201\1\u0253\20\201\1\320"+ + "\12\201\174\0\1\317\22\201\1\u0254\7\201\1\320\2\201"+ + "\1\u0255\7\201\174\0\1\317\7\201\1\u0256\22\201\1\320"+ + "\12\201\174\0\1\317\7\201\1\u0257\5\201\1\u0258\14\201"+ + "\1\320\12\201\174\0\1\317\23\201\1\u0259\6\201\1\320"+ + "\12\201\174\0\1\317\32\201\1\320\3\201\1\u025a\6\201"+ + "\174\0\1\317\17\201\1\u025b\12\201\1\320\12\201\174\0"+ + "\1\317\32\201\1\320\1\u025c\11\201\230\0\12\u025d\10\0"+ + "\1\u014e\1\u014f\1\u0150\161\0\1\346\1\u025e\31\235\1\117"+ + "\12\235\1\0\3\114\1\0\2\114\1\115\3\114\3\0"+ + "\1\114\4\0\2\114\147\0\1\346\21\235\1\u025f\10\235"+ + "\1\117\12\235\1\0\3\114\1\0\2\114\1\115\3\114"+ + "\3\0\1\114\4\0\2\114\147\0\1\346\16\235\1\u0260"+ + "\4\235\1\u0261\6\235\1\117\12\235\1\0\3\114\1\0"+ + "\2\114\1\115\3\114\3\0\1\114\4\0\2\114\147\0"+ + "\1\346\32\235\1\117\10\235\1\u0262\1\235\1\0\3\114"+ + "\1\0\2\114\1\115\3\114\3\0\1\114\4\0\2\114"+ + "\147\0\1\346\32\235\1\117\10\235\1\u0263\1\235\1\0"+ + "\3\114\1\0\2\114\1\115\3\114\3\0\1\114\4\0"+ + "\2\114\147\0\1\346\1\u0264\2\235\1\u0265\26\235\1\117"+ + "\12\235\1\0\3\114\1\0\2\114\1\115\3\114\3\0"+ + "\1\114\4\0\2\114\147\0\1\346\16\235\1\u0266\13\235"+ + "\1\117\12\235\1\0\3\114\1\0\2\114\1\115\3\114"+ + "\3\0\1\114\4\0\2\114\147\0\1\346\11\235\1\u0267"+ + "\13\235\1\u0268\4\235\1\117\12\235\1\0\3\114\1\0"+ + "\2\114\1\115\3\114\3\0\1\114\4\0\2\114\147\0"+ + "\1\346\32\235\1\117\10\235\1\u0269\1\235\1\0\3\114"+ + "\1\0\2\114\1\115\3\114\3\0\1\114\4\0\2\114"+ + "\147\0\1\346\23\235\1\u026a\6\235\1\117\12\235\1\0"+ + "\3\114\1\0\2\114\1\115\3\114\3\0\1\114\4\0"+ + "\2\114\147\0\1\346\31\235\1\u026b\1\117\12\235\1\0"+ + "\3\114\1\0\2\114\1\115\3\114\3\0\1\114\4\0"+ + "\2\114\147\0\1\346\26\235\1\u026c\3\235\1\117\12\235"+ + "\1\0\3\114\1\0\2\114\1\115\3\114\3\0\1\114"+ + "\4\0\2\114\147\0\1\346\11\235\1\u026d\20\235\1\117"+ + "\12\235\1\0\3\114\1\0\2\114\1\115\3\114\3\0"+ + "\1\114\4\0\2\114\147\0\1\346\32\235\1\117\3\235"+ + "\1\u026e\6\235\1\0\3\114\1\0\2\114\1\115\3\114"+ + "\3\0\1\114\4\0\2\114\147\0\1\346\10\235\1\u026f"+ + "\21\235\1\117\12\235\1\0\3\114\1\0\2\114\1\115"+ + "\3\114\3\0\1\114\4\0\2\114\147\0\1\346\3\235"+ + "\1\u0270\26\235\1\117\12\235\1\0\3\114\1\0\2\114"+ + "\1\115\3\114\3\0\1\114\4\0\2\114\147\0\1\346"+ + "\21\235\1\u0271\6\235\1\u0272\1\235\1\117\12\235\1\0"+ + "\3\114\1\0\2\114\1\115\3\114\3\0\1\114\4\0"+ + "\2\114\147\0\1\346\12\235\1\u0273\17\235\1\117\12\235"+ + "\1\0\3\114\1\0\2\114\1\115\3\114\3\0\1\114"+ + "\4\0\2\114\147\0\1\346\32\235\1\117\1\235\1\u0274"+ + "\10\235\1\0\3\114\1\0\2\114\1\115\3\114\3\0"+ + "\1\114\4\0\2\114\147\0\1\346\24\235\1\u0275\5\235"+ + "\1\117\12\235\1\0\3\114\1\0\2\114\1\115\3\114"+ + "\3\0\1\114\4\0\2\114\147\0\1\346\31\235\1\u0276"+ + "\1\117\12\235\1\0\3\114\1\0\2\114\1\115\3\114"+ + "\3\0\1\114\4\0\2\114\150\0\32\u01de\1\0\12\u01de"+ + "\175\0\32\u01de\1\u0227\12\u01de\175\0\4\u0277\2\0\1\u0277"+ + "\15\0\1\u0277\6\0\12\u0277\175\0\4\u0278\2\0\1\u0278"+ + "\15\0\1\u0278\6\0\12\u0278\1\u0279\241\0\1\u027a\173\0"+ + "\34\u01ab\12\u027b\1\0\2\u01ab\1\u01e5\3\u01ab\1\u01ad\1\0"+ + "\1\u01e4\3\0\2\u01ab\4\0\1\u01ab\151\0\4\u027c\2\0"+ + "\1\u027c\15\0\1\u027c\6\0\12\u027c\214\0\1\u027d\222\0"+ + "\4\u01ab\2\0\1\u01ab\15\0\1\u01ab\6\0\12\u01ab\175\0"+ + "\32\u01e7\1\0\12\u01e7\175\0\32\u01e7\1\u0230\12\u01e7\230\0"+ + "\12\u027e\175\0\4\u027f\2\0\1\u027f\15\0\1\u027f\6\0"+ + "\12\u027f\1\u0233\174\0\4\u0280\2\0\1\u0280\15\0\1\u0280"+ + "\6\0\12\u0280\1\u0281\174\0\4\u0282\2\0\1\u0282\15\0"+ + "\1\u0282\6\0\1\u0283\2\u0284\1\u0283\4\u0284\1\u0285\1\u0284"+ + "\14\0\1\u0286\157\0\1\u01ea\32\u01eb\1\u01ea\12\u01eb\1\u01ec"+ + "\2\u01ea\1\u01ed\3\u01ea\1\u01ee\5\0\2\u01ea\4\0\1\u01ea"+ + "\150\0\1\u01ea\32\u01eb\1\u0236\12\u01eb\1\u01ec\2\u01ea\1\u01ed"+ + "\3\u01ea\1\u01ee\5\0\2\u01ea\4\0\1\u01ea\150\0\34\u01ec"+ + "\12\u0287\1\0\2\u01ec\1\u0238\3\u01ec\1\u01ee\5\0\2\u01ec"+ + "\4\0\1\u01ec\151\0\4\u0288\2\0\1\u0288\15\0\1\u0288"+ + "\6\0\12\u0288\175\0\4\u01ea\2\0\1\u01ea\15\0\1\u01ea"+ + "\6\0\12\u01ea\174\0\1\u0289\32\u023a\1\u028a\12\u023a\1\u0136"+ + "\7\0\1\u014e\1\u014f\1\u0150\227\0\1\u01b6\174\0\4\u028b"+ + "\2\0\1\u028b\15\0\1\u028b\6\0\12\u028b\1\u023d\174\0"+ + "\4\u028c\2\0\1\u028c\15\0\1\u028c\6\0\12\u028c\1\u028d"+ + "\174\0\4\u028e\2\0\1\u028e\15\0\1\u028e\6\0\12\u028e"+ + "\1\u028f\13\0\1\u0109\157\0\1\u014a\4\u028e\2\0\1\u028e"+ + "\15\0\1\u028e\6\0\12\u0290\1\u028f\13\0\1\u0109\157\0"+ + "\1\u014a\4\u028e\2\0\1\u028e\15\0\1\u028e\6\0\12\u0291"+ + "\1\u028f\13\0\1\u0109\157\0\1\u014a\4\u028e\2\0\1\u028e"+ + "\15\0\1\u028e\6\0\1\u0290\1\u0292\1\u0291\2\u0290\2\u0291"+ + "\2\u0290\1\u0291\1\u028f\13\0\1\u0109\160\0\4\u0293\2\0"+ + "\1\u0293\15\0\1\u0293\6\0\12\u0293\1\u01f6\13\0\1\u0109"+ + "\157\0\1\u014a\4\u0293\2\0\1\u0293\15\0\1\u0293\6\0"+ + "\12\u0293\1\u01f6\13\0\1\u0109\213\0\1\u0294\2\u0295\1\u0294"+ + "\4\u0295\1\u0296\1\u0295\174\0\1\u0244\241\0\1\u0244\33\0"+ + "\2\u0245\1\0\2\u0245\2\0\2\u0245\175\0\1\317\24\201"+ + "\1\u0297\5\201\1\320\12\201\174\0\1\317\1\201\1\u0298"+ + "\30\201\1\320\12\201\174\0\1\317\14\201\1\u0299\15\201"+ + "\1\320\12\201\174\0\1\317\1\201\1\u029a\30\201\1\320"+ + "\12\201\174\0\1\317\21\201\1\u029b\10\201\1\320\12\201"+ + "\174\0\1\317\24\201\1\u029c\5\201\1\320\12\201\174\0"+ + "\1\317\24\201\1\u029d\5\201\1\320\12\201\174\0\1\317"+ + "\1\u018c\31\201\1\320\12\201\174\0\1\317\24\201\1\u029e"+ + "\5\201\1\320\12\201\174\0\1\317\1\201\1\u029f\30\201"+ + "\1\320\12\201\174\0\1\317\31\201\1\u02a0\1\320\12\201"+ + "\174\0\1\317\24\201\1\u02a1\5\201\1\320\12\201\174\0"+ + "\1\317\1\201\1\u02a2\30\201\1\320\12\201\174\0\1\317"+ + "\1\u02a3\31\201\1\320\12\201\174\0\1\317\21\201\1\u02a4"+ + "\10\201\1\320\12\201\174\0\1\317\24\201\1\u02a5\5\201"+ + "\1\320\12\201\174\0\1\317\24\201\1\u02a6\5\201\1\320"+ + "\12\201\174\0\1\317\4\201\1\u02a7\25\201\1\320\12\201"+ + "\174\0\1\317\21\201\1\u02a8\10\201\1\320\12\201\174\0"+ + "\1\317\24\201\1\u02a9\5\201\1\320\12\201\174\0\1\317"+ + "\32\201\1\320\7\201\1\u02aa\2\201\174\0\1\317\1\u02ab"+ + "\31\201\1\320\12\201\252\0\1\u014e\1\u014f\1\u0150\161\0"+ + "\1\346\32\235\1\117\1\u02ac\11\235\1\0\3\114\1\0"+ + "\2\114\1\115\3\114\3\0\1\114\4\0\2\114\147\0"+ + "\1\346\32\235\1\117\7\235\1\u02ad\2\235\1\0\3\114"+ + "\1\0\2\114\1\115\3\114\3\0\1\114\4\0\2\114"+ + "\147\0\1\346\32\235\1\117\6\235\1\u013d\3\235\1\0"+ + "\3\114\1\0\2\114\1\115\3\114\3\0\1\114\4\0"+ + "\2\114\147\0\1\346\32\235\1\117\5\235\1\u013d\4\235"+ + "\1\0\3\114\1\0\2\114\1\115\3\114\3\0\1\114"+ + "\4\0\2\114\147\0\1\346\1\235\1\u02ae\30\235\1\117"+ + "\12\235\1\0\3\114\1\0\2\114\1\115\3\114\3\0"+ + "\1\114\4\0\2\114\147\0\1\346\32\235\1\117\1\235"+ + "\1\u02af\10\235\1\0\3\114\1\0\2\114\1\115\3\114"+ + "\3\0\1\114\4\0\2\114\147\0\1\346\1\u02b0\27\235"+ + "\1\u02b1\1\235\1\117\12\235\1\0\3\114\1\0\2\114"+ + "\1\115\3\114\3\0\1\114\4\0\2\114\147\0\1\346"+ + "\4\235\1\u02b2\25\235\1\117\12\235\1\0\3\114\1\0"+ + "\2\114\1\115\3\114\3\0\1\114\4\0\2\114\147\0"+ + "\1\346\32\235\1\117\1\u02b3\11\235\1\0\3\114\1\0"+ + "\2\114\1\115\3\114\3\0\1\114\4\0\2\114\147\0"+ + "\1\346\32\235\1\117\2\235\1\260\7\235\1\0\3\114"+ + "\1\0\2\114\1\115\3\114\3\0\1\114\4\0\2\114"+ + "\147\0\1\346\32\235\1\117\3\235\1\u02b4\6\235\1\0"+ + "\3\114\1\0\2\114\1\115\3\114\3\0\1\114\4\0"+ + "\2\114\147\0\1\346\1\u02b5\31\235\1\117\12\235\1\0"+ + "\3\114\1\0\2\114\1\115\3\114\3\0\1\114\4\0"+ + "\2\114\147\0\1\346\1\u02ad\31\235\1\117\12\235\1\0"+ + "\3\114\1\0\2\114\1\115\3\114\3\0\1\114\4\0"+ + "\2\114\147\0\1\346\32\235\1\117\2\235\1\u02b6\7\235"+ + "\1\0\3\114\1\0\2\114\1\115\3\114\3\0\1\114"+ + "\4\0\2\114\147\0\1\346\32\235\1\117\2\235\1\u02b7"+ + "\7\235\1\0\3\114\1\0\2\114\1\115\3\114\3\0"+ + "\1\114\4\0\2\114\147\0\1\346\15\235\1\u02b8\14\235"+ + "\1\117\12\235\1\0\3\114\1\0\2\114\1\115\3\114"+ + "\3\0\1\114\4\0\2\114\147\0\1\346\32\235\1\117"+ + "\5\235\1\u02b9\4\235\1\0\3\114\1\0\2\114\1\115"+ + "\3\114\3\0\1\114\4\0\2\114\147\0\1\346\32\235"+ + "\1\117\10\235\1\u02ba\1\235\1\0\3\114\1\0\2\114"+ + "\1\115\3\114\3\0\1\114\4\0\2\114\147\0\1\346"+ + "\1\235\1\u02bb\30\235\1\117\12\235\1\0\3\114\1\0"+ + "\2\114\1\115\3\114\3\0\1\114\4\0\2\114\147\0"+ + "\1\346\32\235\1\117\3\235\1\u02bc\6\235\1\0\3\114"+ + "\1\0\2\114\1\115\3\114\3\0\1\114\4\0\2\114"+ + "\147\0\1\346\32\235\1\117\1\235\1\u02bd\10\235\1\0"+ + "\3\114\1\0\2\114\1\115\3\114\3\0\1\114\4\0"+ + "\2\114\147\0\1\346\32\235\1\117\1\235\1\u02be\10\235"+ + "\1\0\3\114\1\0\2\114\1\115\3\114\3\0\1\114"+ + "\4\0\2\114\147\0\1\346\24\235\1\u02bf\5\235\1\117"+ + "\12\235\1\0\3\114\1\0\2\114\1\115\3\114\3\0"+ + "\1\114\4\0\2\114\147\0\1\346\32\235\1\117\3\235"+ + "\1\u02c0\6\235\1\0\3\114\1\0\2\114\1\115\3\114"+ + "\3\0\1\114\4\0\2\114\147\0\1\346\25\235\1\u02c1"+ + "\4\235\1\117\12\235\1\0\3\114\1\0\2\114\1\115"+ + "\3\114\3\0\1\114\4\0\2\114\150\0\4\u01df\2\0"+ + "\1\u01df\15\0\1\u01df\6\0\12\u01df\175\0\4\u02c2\2\0"+ + "\1\u02c2\15\0\1\u02c2\6\0\12\u02c2\1\u0279\174\0\4\u02c3"+ + "\2\0\1\u02c3\15\0\1\u02c3\6\0\12\u02c3\1\u02c4\174\0"+ + "\4\u02c5\2\0\1\u02c5\15\0\1\u02c5\6\0\1\u02c6\2\u02c7"+ + "\1\u02c6\4\u02c7\1\u02c8\1\u02c7\14\0\1\u02c9\157\0\34\u01ab"+ + "\12\u02ca\1\0\2\u01ab\1\u01e5\3\u01ab\1\u01ad\1\0\1\u01e4"+ + "\3\0\2\u01ab\4\0\1\u01ab\151\0\4\u01e4\2\0\1\u01e4"+ + "\15\0\1\u01e4\6\0\12\u01e4\225\0\1\u02cb\244\0\12\u02cc"+ + "\11\0\1\u01e4\163\0\4\u02cd\2\0\1\u02cd\15\0\1\u02cd"+ + "\6\0\12\u02cd\1\u0233\174\0\4\u02ce\2\0\1\u02ce\15\0"+ + "\1\u02ce\6\0\12\u02ce\1\u02cf\174\0\4\u02d0\2\0\1\u02d0"+ + "\15\0\1\u02d0\6\0\1\u02d1\2\u02d2\1\u02d1\4\u02d2\1\u02d3"+ + "\1\u02d2\14\0\1\u0286\160\0\4\u02d4\2\0\1\u02d4\15\0"+ + "\1\u02d4\6\0\12\u02d4\1\u02d5\13\0\1\u0286\157\0\1\u02d6"+ + "\4\u02d4\2\0\1\u02d4\15\0\1\u02d4\6\0\12\u02d7\1\u02d5"+ + "\13\0\1\u0286\157\0\1\u02d6\4\u02d4\2\0\1\u02d4\15\0"+ + "\1\u02d4\6\0\12\u02d8\1\u02d5\13\0\1\u0286\157\0\1\u02d6"+ + "\4\u02d4\2\0\1\u02d4\15\0\1\u02d4\6\0\1\u02d7\1\u02d9"+ + "\1\u02d8\2\u02d7\2\u02d8\2\u02d7\1\u02d8\1\u02d5\13\0\1\u0286"+ + "\225\0\1\u0231\10\0\1\u01e4\162\0\34\u01ec\12\u02da\1\0"+ + "\2\u01ec\1\u0238\3\u01ec\1\u01ee\1\u014e\1\u014f\1\u0150\2\0"+ + "\2\u01ec\4\0\1\u01ec\151\0\4\u01ec\2\0\1\u01ec\15\0"+ + "\1\u01ec\6\0\12\u01ec\175\0\32\u023a\1\0\12\u023a\175\0"+ + "\32\u023a\1\u028a\12\u023a\175\0\4\u02db\2\0\1\u02db\15\0"+ + "\1\u02db\6\0\12\u02db\1\u023d\174\0\4\u02dc\2\0\1\u02dc"+ + "\15\0\1\u02dc\6\0\12\u02dc\1\u02dd\174\0\4\u02de\2\0"+ + "\1\u02de\15\0\1\u02de\6\0\1\u02df\2\u02e0\1\u02df\4\u02e0"+ + "\1\u02e1\1\u02e0\14\0\1\u0109\160\0\4\u02e2\2\0\1\u02e2"+ + "\15\0\1\u02e2\6\0\12\u02e2\1\u028f\13\0\1\u0109\160\0"+ + "\4\u02de\2\0\1\u02de\15\0\1\u02de\6\0\1\u02df\2\u02e0"+ + "\1\u02df\4\u02e0\1\u02e1\1\u02e0\174\0\1\u014a\4\u02e2\2\0"+ + "\1\u02e2\15\0\1\u02e2\6\0\12\u02e3\1\u028f\13\0\1\u0109"+ + "\157\0\1\u014a\4\u02e2\2\0\1\u02e2\15\0\1\u02e2\6\0"+ + "\12\u02e2\1\u028f\13\0\1\u0109\157\0\1\u014a\4\u02e2\2\0"+ + "\1\u02e2\15\0\1\u02e2\6\0\2\u02e3\1\u02e2\2\u02e3\2\u02e2"+ + "\2\u02e3\1\u02e2\1\u028f\13\0\1\u0109\225\0\1\u01f6\13\0"+ + "\1\u0109\213\0\12\u0295\14\0\1\u0109\213\0\12\u02e4\14\0"+ + "\1\u0109\213\0\1\u0295\1\u02e5\1\u02e4\2\u0295\2\u02e4\2\u0295"+ + "\1\u02e4\14\0\1\u0109\157\0\1\317\1\u02e6\31\201\1\320"+ + "\12\201\174\0\1\317\21\201\1\u02e7\10\201\1\320\12\201"+ + "\174\0\1\317\16\201\1\u02e8\4\201\1\u02e9\6\201\1\320"+ + "\12\201\174\0\1\317\32\201\1\320\10\201\1\u02ea\1\201"+ + "\174\0\1\317\32\201\1\320\10\201\1\u02eb\1\201\174\0"+ + "\1\317\1\u02ec\2\201\1\u02ed\26\201\1\320\12\201\174\0"+ + "\1\317\16\201\1\u02ee\13\201\1\320\12\201\174\0\1\317"+ + "\11\201\1\u02ef\13\201\1\u02f0\4\201\1\320\12\201\174\0"+ + "\1\317\32\201\1\320\10\201\1\u02f1\1\201\174\0\1\317"+ + "\23\201\1\u02f2\6\201\1\320\12\201\174\0\1\317\31\201"+ + "\1\u02f3\1\320\12\201\174\0\1\317\26\201\1\u02f4\3\201"+ + "\1\320\12\201\174\0\1\317\11\201\1\u02f5\20\201\1\320"+ + "\12\201\174\0\1\317\32\201\1\320\3\201\1\u02f6\6\201"+ + "\174\0\1\317\10\201\1\u02f7\21\201\1\320\12\201\174\0"+ + "\1\317\3\201\1\u02f8\26\201\1\320\12\201\174\0\1\317"+ + "\21\201\1\u02f9\6\201\1\u02fa\1\201\1\320\12\201\174\0"+ + "\1\317\12\201\1\u02fb\17\201\1\320\12\201\174\0\1\317"+ + "\32\201\1\320\1\201\1\u02fc\10\201\174\0\1\317\24\201"+ + "\1\u02fd\5\201\1\320\12\201\174\0\1\317\31\201\1\u02fe"+ + "\1\320\12\201\174\0\1\346\1\u02ff\31\235\1\117\12\235"+ + "\1\0\3\114\1\0\2\114\1\115\3\114\3\0\1\114"+ + "\4\0\2\114\147\0\1\346\25\235\1\353\4\235\1\117"+ + "\12\235\1\0\3\114\1\0\2\114\1\115\3\114\3\0"+ + "\1\114\4\0\2\114\147\0\1\346\32\235\1\117\5\235"+ + "\1\u0300\4\235\1\0\3\114\1\0\2\114\1\115\3\114"+ + "\3\0\1\114\4\0\2\114\147\0\1\346\32\235\1\117"+ + "\3\235\1\u02ff\6\235\1\0\3\114\1\0\2\114\1\115"+ + "\3\114\3\0\1\114\4\0\2\114\147\0\1\346\12\235"+ + "\1\u0301\17\235\1\117\12\235\1\0\3\114\1\0\2\114"+ + "\1\115\3\114\3\0\1\114\4\0\2\114\147\0\1\346"+ + "\25\235\1\u0302\4\235\1\117\12\235\1\0\3\114\1\0"+ + "\2\114\1\115\3\114\3\0\1\114\4\0\2\114\147\0"+ + "\1\346\15\235\1\u0303\14\235\1\117\12\235\1\0\3\114"+ + "\1\0\2\114\1\115\3\114\3\0\1\114\4\0\2\114"+ + "\147\0\1\346\2\235\1\u02ad\27\235\1\117\12\235\1\0"+ + "\3\114\1\0\2\114\1\115\3\114\3\0\1\114\4\0"+ + "\2\114\147\0\1\346\1\235\1\353\30\235\1\117\12\235"+ + "\1\0\3\114\1\0\2\114\1\115\3\114\3\0\1\114"+ + "\4\0\2\114\147\0\1\346\11\235\1\u0304\20\235\1\117"+ + "\12\235\1\0\3\114\1\0\2\114\1\115\3\114\3\0"+ + "\1\114\4\0\2\114\147\0\1\346\1\u0305\31\235\1\117"+ + "\12\235\1\0\3\114\1\0\2\114\1\115\3\114\3\0"+ + "\1\114\4\0\2\114\147\0\1\346\1\u0306\31\235\1\117"+ + "\12\235\1\0\3\114\1\0\2\114\1\115\3\114\3\0"+ + "\1\114\4\0\2\114\147\0\1\346\2\235\1\u0307\27\235"+ + "\1\117\12\235\1\0\3\114\1\0\2\114\1\115\3\114"+ + "\3\0\1\114\4\0\2\114\147\0\1\346\32\235\1\117"+ + "\4\235\1\362\5\235\1\0\3\114\1\0\2\114\1\115"+ + "\3\114\3\0\1\114\4\0\2\114\147\0\1\346\1\u0308"+ + "\31\235\1\117\12\235\1\0\3\114\1\0\2\114\1\115"+ + "\3\114\3\0\1\114\4\0\2\114\147\0\1\346\25\235"+ + "\1\u0309\4\235\1\117\12\235\1\0\3\114\1\0\2\114"+ + "\1\115\3\114\3\0\1\114\4\0\2\114\147\0\1\346"+ + "\32\235\1\117\4\235\1\u02ff\5\235\1\0\3\114\1\0"+ + "\2\114\1\115\3\114\3\0\1\114\4\0\2\114\147\0"+ + "\1\346\32\235\1\117\11\235\1\u02ff\1\0\3\114\1\0"+ + "\2\114\1\115\3\114\3\0\1\114\4\0\2\114\147\0"+ + "\1\346\32\235\1\117\2\235\1\u02ff\7\235\1\0\3\114"+ + "\1\0\2\114\1\115\3\114\3\0\1\114\4\0\2\114"+ + "\147\0\1\346\16\235\1\u030a\13\235\1\117\12\235\1\0"+ + "\3\114\1\0\2\114\1\115\3\114\3\0\1\114\4\0"+ + "\2\114\147\0\1\346\32\235\1\117\3\235\1\u030b\6\235"+ + "\1\0\3\114\1\0\2\114\1\115\3\114\3\0\1\114"+ + "\4\0\2\114\147\0\1\346\24\235\1\u030c\5\235\1\117"+ + "\12\235\1\0\3\114\1\0\2\114\1\115\3\114\3\0"+ + "\1\114\4\0\2\114\150\0\4\u030d\2\0\1\u030d\15\0"+ + "\1\u030d\6\0\12\u030d\1\u0279\174\0\4\u030e\2\0\1\u030e"+ + "\15\0\1\u030e\6\0\12\u030e\1\u030f\174\0\4\u0310\2\0"+ + "\1\u0310\15\0\1\u0310\6\0\1\u0311\2\u0312\1\u0311\4\u0312"+ + "\1\u0313\1\u0312\14\0\1\u02c9\160\0\4\u0314\2\0\1\u0314"+ + "\15\0\1\u0314\6\0\12\u0314\1\u0315\13\0\1\u02c9\157\0"+ + "\1\u0316\4\u0314\2\0\1\u0314\15\0\1\u0314\6\0\12\u0317"+ + "\1\u0315\13\0\1\u02c9\157\0\1\u0316\4\u0314\2\0\1\u0314"+ + "\15\0\1\u0314\6\0\12\u0318\1\u0315\13\0\1\u02c9\157\0"+ + "\1\u0316\4\u0314\2\0\1\u0314\15\0\1\u0314\6\0\1\u0317"+ + "\1\u0319\1\u0318\2\u0317\2\u0318\2\u0317\1\u0318\1\u0315\13\0"+ + "\1\u02c9\236\0\1\u01df\162\0\34\u01ab\12\u031a\1\0\2\u01ab"+ + "\1\u01e5\3\u01ab\1\u01ad\1\0\1\u01e4\3\0\2\u01ab\4\0"+ + "\1\u01ab\166\0\1\u031b\257\0\12\u031c\11\0\1\u01e4\230\0"+ + "\1\u0233\174\0\4\u031d\2\0\1\u031d\15\0\1\u031d\6\0"+ + "\12\u031d\1\u02cf\174\0\4\u031e\2\0\1\u031e\15\0\1\u031e"+ + "\6\0\12\u031e\1\u031f\174\0\4\u0320\2\0\1\u0320\15\0"+ + "\1\u0320\6\0\12\u0320\1\u0321\13\0\1\u0286\157\0\1\u02d6"+ + "\4\u0320\2\0\1\u0320\15\0\1\u0320\6\0\12\u0322\1\u0321"+ + "\13\0\1\u0286\157\0\1\u02d6\4\u0320\2\0\1\u0320\15\0"+ + "\1\u0320\6\0\12\u0323\1\u0321\13\0\1\u0286\157\0\1\u02d6"+ + "\4\u0320\2\0\1\u0320\15\0\1\u0320\6\0\1\u0322\1\u0324"+ + "\1\u0323\2\u0322\2\u0323\2\u0322\1\u0323\1\u0321\13\0\1\u0286"+ + "\160\0\4\u0325\2\0\1\u0325\15\0\1\u0325\6\0\12\u0325"+ + "\1\u02d5\13\0\1\u0286\160\0\4\u02d0\2\0\1\u02d0\15\0"+ + "\1\u02d0\6\0\1\u02d1\2\u02d2\1\u02d1\4\u02d2\1\u02d3\1\u02d2"+ + "\230\0\1\u0326\2\u0327\1\u0326\4\u0327\1\u0328\1\u0327\174\0"+ + "\1\u02d6\4\u0325\2\0\1\u0325\15\0\1\u0325\6\0\12\u0329"+ + "\1\u02d5\13\0\1\u0286\157\0\1\u02d6\4\u0325\2\0\1\u0325"+ + "\15\0\1\u0325\6\0\12\u0325\1\u02d5\13\0\1\u0286\157\0"+ + "\1\u02d6\4\u0325\2\0\1\u0325\15\0\1\u0325\6\0\2\u0329"+ + "\1\u0325\2\u0329\2\u0325\2\u0329\1\u0325\1\u02d5\13\0\1\u0286"+ + "\157\0\34\u01ec\12\u032a\1\0\2\u01ec\1\u0238\3\u01ec\1\u01ee"+ + "\1\u014e\1\u014f\1\u0150\2\0\2\u01ec\4\0\1\u01ec\216\0"+ + "\1\u023d\174\0\4\u032b\2\0\1\u032b\15\0\1\u032b\6\0"+ + "\12\u032b\1\u02dd\174\0\4\u032c\2\0\1\u032c\15\0\1\u032c"+ + "\6\0\12\u032c\1\u032d\174\0\4\u032e\2\0\1\u032e\15\0"+ + "\1\u032e\6\0\12\u032e\1\u032f\13\0\1\u0109\157\0\1\u014a"+ + "\4\u032e\2\0\1\u032e\15\0\1\u032e\6\0\12\u0330\1\u032f"+ + "\13\0\1\u0109\157\0\1\u014a\4\u032e\2\0\1\u032e\15\0"+ + "\1\u032e\6\0\12\u0331\1\u032f\13\0\1\u0109\157\0\1\u014a"+ + "\4\u032e\2\0\1\u032e\15\0\1\u032e\6\0\1\u0330\1\u0332"+ + "\1\u0331\2\u0330\2\u0331\2\u0330\1\u0331\1\u032f\13\0\1\u0109"+ + "\160\0\4\u0333\2\0\1\u0333\15\0\1\u0333\6\0\12\u0333"+ + "\1\u028f\13\0\1\u0109\157\0\1\u014a\4\u0333\2\0\1\u0333"+ + "\15\0\1\u0333\6\0\12\u0333\1\u028f\13\0\1\u0109\241\0"+ + "\1\u0109\213\0\2\u02e4\1\0\2\u02e4\2\0\2\u02e4\15\0"+ + "\1\u0109\157\0\1\317\32\201\1\320\1\u0334\11\201\174\0"+ + "\1\317\32\201\1\320\7\201\1\u0335\2\201\174\0\1\317"+ + "\32\201\1\320\6\201\1\u0191\3\201\174\0\1\317\32\201"+ + "\1\320\5\201\1\u0191\4\201\174\0\1\317\1\201\1\u0336"+ + "\30\201\1\320\12\201\174\0\1\317\32\201\1\320\1\201"+ + "\1\u0337\10\201\174\0\1\317\1\u0338\27\201\1\u0339\1\201"+ + "\1\320\12\201\174\0\1\317\4\201\1\u033a\25\201\1\320"+ + "\12\201\174\0\1\317\32\201\1\320\1\u033b\11\201\174\0"+ + "\1\317\32\201\1\320\2\201\1\u0116\7\201\174\0\1\317"+ + "\32\201\1\320\3\201\1\u033c\6\201\174\0\1\317\1\u033d"+ + "\31\201\1\320\12\201\174\0\1\317\1\u0335\31\201\1\320"+ + "\12\201\174\0\1\317\32\201\1\320\2\201\1\u033e\7\201"+ + "\174\0\1\317\32\201\1\320\2\201\1\u033f\7\201\174\0"+ + "\1\317\15\201\1\u0340\14\201\1\320\12\201\174\0\1\317"+ + "\32\201\1\320\5\201\1\u0341\4\201\174\0\1\317\32\201"+ + "\1\320\10\201\1\u0342\1\201\174\0\1\317\1\201\1\u0343"+ + "\30\201\1\320\12\201\174\0\1\317\32\201\1\320\3\201"+ + "\1\u0344\6\201\174\0\1\317\32\201\1\320\1\201\1\u0345"+ + "\10\201\174\0\1\317\32\201\1\320\1\201\1\u0346\10\201"+ + "\174\0\1\317\24\201\1\u0347\5\201\1\320\12\201\174\0"+ + "\1\317\32\201\1\320\3\201\1\u0348\6\201\174\0\1\317"+ + "\25\201\1\u0349\4\201\1\320\12\201\174\0\1\346\2\235"+ + "\1\353\27\235\1\117\12\235\1\0\3\114\1\0\2\114"+ + "\1\115\3\114\3\0\1\114\4\0\2\114\147\0\1\346"+ + "\3\235\1\u034a\26\235\1\117\12\235\1\0\3\114\1\0"+ + "\2\114\1\115\3\114\3\0\1\114\4\0\2\114\147\0"+ + "\1\346\32\235\1\117\11\235\1\u034b\1\0\3\114\1\0"+ + "\2\114\1\115\3\114\3\0\1\114\4\0\2\114\147\0"+ + "\1\346\32\235\1\117\11\235\1\u034c\1\0\3\114\1\0"+ + "\2\114\1\115\3\114\3\0\1\114\4\0\2\114\147\0"+ + "\1\346\32\235\1\117\7\235\1\u034d\2\235\1\0\3\114"+ + "\1\0\2\114\1\115\3\114\3\0\1\114\4\0\2\114"+ + "\147\0\1\346\32\235\1\117\4\235\1\u034e\5\235\1\0"+ + "\3\114\1\0\2\114\1\115\3\114\3\0\1\114\4\0"+ + "\2\114\147\0\1\346\26\235\1\u034f\3\235\1\117\12\235"+ + "\1\0\3\114\1\0\2\114\1\115\3\114\3\0\1\114"+ + "\4\0\2\114\147\0\1\346\30\235\1\u0350\1\235\1\117"+ + "\12\235\1\0\3\114\1\0\2\114\1\115\3\114\3\0"+ + "\1\114\4\0\2\114\147\0\1\346\11\235\1\u0138\20\235"+ + "\1\117\12\235\1\0\3\114\1\0\2\114\1\115\3\114"+ + "\3\0\1\114\4\0\2\114\147\0\1\346\12\235\1\u0351"+ + "\17\235\1\117\12\235\1\0\3\114\1\0\2\114\1\115"+ + "\3\114\3\0\1\114\4\0\2\114\147\0\1\346\17\235"+ + "\1\363\12\235\1\117\12\235\1\0\3\114\1\0\2\114"+ + "\1\115\3\114\3\0\1\114\4\0\2\114\147\0\1\346"+ + "\32\235\1\117\4\235\1\u0352\5\235\1\0\3\114\1\0"+ + "\2\114\1\115\3\114\3\0\1\114\4\0\2\114\147\0"+ + "\1\346\30\235\1\u0353\1\235\1\117\12\235\1\0\3\114"+ + "\1\0\2\114\1\115\3\114\3\0\1\114\4\0\2\114"+ + "\147\0\1\346\30\235\1\u0354\1\235\1\117\12\235\1\0"+ + "\3\114\1\0\2\114\1\115\3\114\3\0\1\114\4\0"+ + "\2\114\215\0\1\u0279\174\0\4\u0355\2\0\1\u0355\15\0"+ + "\1\u0355\6\0\12\u0355\1\u030f\174\0\4\u0356\2\0\1\u0356"+ + "\15\0\1\u0356\6\0\12\u0356\1\u0357\174\0\4\u0358\2\0"+ + "\1\u0358\15\0\1\u0358\6\0\12\u0358\1\u0359\13\0\1\u02c9"+ + "\157\0\1\u0316\4\u0358\2\0\1\u0358\15\0\1\u0358\6\0"+ + "\12\u035a\1\u0359\13\0\1\u02c9\157\0\1\u0316\4\u0358\2\0"+ + "\1\u0358\15\0\1\u0358\6\0\12\u035b\1\u0359\13\0\1\u02c9"+ + "\157\0\1\u0316\4\u0358\2\0\1\u0358\15\0\1\u0358\6\0"+ + "\1\u035a\1\u035c\1\u035b\2\u035a\2\u035b\2\u035a\1\u035b\1\u0359"+ + "\13\0\1\u02c9\160\0\4\u035d\2\0\1\u035d\15\0\1\u035d"+ + "\6\0\12\u035d\1\u0315\13\0\1\u02c9\160\0\4\u0310\2\0"+ + "\1\u0310\15\0\1\u0310\6\0\1\u0311\2\u0312\1\u0311\4\u0312"+ + "\1\u0313\1\u0312\230\0\1\u035e\2\u035f\1\u035e\4\u035f\1\u0360"+ + "\1\u035f\174\0\1\u0316\4\u035d\2\0\1\u035d\15\0\1\u035d"+ + "\6\0\12\u0361\1\u0315\13\0\1\u02c9\157\0\1\u0316\4\u035d"+ + "\2\0\1\u035d\15\0\1\u035d\6\0\12\u035d\1\u0315\13\0"+ + "\1\u02c9\157\0\1\u0316\4\u035d\2\0\1\u035d\15\0\1\u035d"+ + "\6\0\2\u0361\1\u035d\2\u0361\2\u035d\2\u0361\1\u035d\1\u0315"+ + "\13\0\1\u02c9\157\0\34\u01ab\12\u0362\1\0\2\u01ab\1\u01e5"+ + "\3\u01ab\1\u01ad\1\0\1\u01e4\3\0\2\u01ab\4\0\1\u01ab"+ + "\154\0\1\u0363\271\0\12\u0364\11\0\1\u01e4\163\0\4\u0365"+ + "\2\0\1\u0365\15\0\1\u0365\6\0\12\u0365\1\u02cf\174\0"+ + "\4\u0366\2\0\1\u0366\15\0\1\u0366\6\0\12\u0366\1\u0367"+ + "\174\0\4\u0368\2\0\1\u0368\15\0\1\u0368\6\0\1\u0369"+ + "\2\u036a\1\u0369\4\u036a\1\u036b\1\u036a\14\0\1\u0286\160\0"+ + "\4\u036c\2\0\1\u036c\15\0\1\u036c\6\0\12\u036c\1\u0321"+ + "\13\0\1\u0286\160\0\4\u0368\2\0\1\u0368\15\0\1\u0368"+ + "\6\0\1\u0369\2\u036a\1\u0369\4\u036a\1\u036b\1\u036a\174\0"+ + "\1\u02d6\4\u036c\2\0\1\u036c\15\0\1\u036c\6\0\12\u036d"+ + "\1\u0321\13\0\1\u0286\157\0\1\u02d6\4\u036c\2\0\1\u036c"+ + "\15\0\1\u036c\6\0\12\u036c\1\u0321\13\0\1\u0286\157\0"+ + "\1\u02d6\4\u036c\2\0\1\u036c\15\0\1\u036c\6\0\2\u036d"+ + "\1\u036c\2\u036d\2\u036c\2\u036d\1\u036c\1\u0321\13\0\1\u0286"+ + "\160\0\4\u036e\2\0\1\u036e\15\0\1\u036e\6\0\12\u036e"+ + "\1\u02d5\13\0\1\u0286\157\0\1\u036f\33\0\12\u0327\174\0"+ + "\1\u036f\33\0\12\u0370\174\0\1\u036f\33\0\1\u0327\1\u0371"+ + "\1\u0370\2\u0327\2\u0370\2\u0327\1\u0370\174\0\1\u02d6\4\u036e"+ + "\2\0\1\u036e\15\0\1\u036e\6\0\12\u036e\1\u02d5\13\0"+ + "\1\u0286\157\0\34\u01ec\12\u0372\1\0\2\u01ec\1\u0238\3\u01ec"+ + "\1\u01ee\1\u014e\1\u014f\1\u0150\2\0\2\u01ec\4\0\1\u01ec"+ + "\151\0\4\u0373\2\0\1\u0373\15\0\1\u0373\6\0\12\u0373"+ + "\1\u02dd\174\0\4\u0374\2\0\1\u0374\15\0\1\u0374\6\0"+ + "\12\u0374\1\u0375\174\0\4\u0376\2\0\1\u0376\15\0\1\u0376"+ + "\6\0\1\u0377\2\u0378\1\u0377\4\u0378\1\u0379\1\u0378\14\0"+ + "\1\u0109\160\0\4\u037a\2\0\1\u037a\15\0\1\u037a\6\0"+ + "\12\u037a\1\u032f\13\0\1\u0109\160\0\4\u0376\2\0\1\u0376"+ + "\15\0\1\u0376\6\0\1\u0377\2\u0378\1\u0377\4\u0378\1\u0379"+ + "\1\u0378\174\0\1\u014a\4\u037a\2\0\1\u037a\15\0\1\u037a"+ + "\6\0\12\u037b\1\u032f\13\0\1\u0109\157\0\1\u014a\4\u037a"+ + "\2\0\1\u037a\15\0\1\u037a\6\0\12\u037a\1\u032f\13\0"+ + "\1\u0109\157\0\1\u014a\4\u037a\2\0\1\u037a\15\0\1\u037a"+ + "\6\0\2\u037b\1\u037a\2\u037b\2\u037a\2\u037b\1\u037a\1\u032f"+ + "\13\0\1\u0109\225\0\1\u028f\13\0\1\u0109\157\0\1\317"+ + "\1\u037c\31\201\1\320\12\201\174\0\1\317\25\201\1\u0151"+ + "\4\201\1\320\12\201\174\0\1\317\32\201\1\320\5\201"+ + "\1\u037d\4\201\174\0\1\317\32\201\1\320\3\201\1\u037c"+ + "\6\201\174\0\1\317\12\201\1\u037e\17\201\1\320\12\201"+ + "\174\0\1\317\25\201\1\u037f\4\201\1\320\12\201\174\0"+ + "\1\317\15\201\1\u0380\14\201\1\320\12\201\174\0\1\317"+ + "\2\201\1\u0335\27\201\1\320\12\201\174\0\1\317\1\201"+ + "\1\u0151\30\201\1\320\12\201\174\0\1\317\11\201\1\u0381"+ + "\20\201\1\320\12\201\174\0\1\317\1\u0382\31\201\1\320"+ + "\12\201\174\0\1\317\1\u0383\31\201\1\320\12\201\174\0"+ + "\1\317\2\201\1\u0384\27\201\1\320\12\201\174\0\1\317"+ + "\32\201\1\320\4\201\1\u0158\5\201\174\0\1\317\1\u0385"+ + "\31\201\1\320\12\201\174\0\1\317\25\201\1\u0386\4\201"+ + "\1\320\12\201\174\0\1\317\32\201\1\320\4\201\1\u037c"+ + "\5\201\174\0\1\317\32\201\1\320\11\201\1\u037c\174\0"+ + "\1\317\32\201\1\320\2\201\1\u037c\7\201\174\0\1\317"+ + "\16\201\1\u0387\13\201\1\320\12\201\174\0\1\317\32\201"+ + "\1\320\3\201\1\u0388\6\201\174\0\1\317\24\201\1\u0389"+ + "\5\201\1\320\12\201\174\0\1\346\32\235\1\117\10\235"+ + "\1\u02b4\1\235\1\0\3\114\1\0\2\114\1\115\3\114"+ + "\3\0\1\114\4\0\2\114\147\0\1\346\1\u038a\31\235"+ + "\1\117\12\235\1\0\3\114\1\0\2\114\1\115\3\114"+ + "\3\0\1\114\4\0\2\114\147\0\1\346\7\235\1\u038b"+ + "\22\235\1\117\12\235\1\0\3\114\1\0\2\114\1\115"+ + "\3\114\3\0\1\114\4\0\2\114\147\0\1\346\1\u038c"+ + "\31\235\1\117\12\235\1\0\3\114\1\0\2\114\1\115"+ + "\3\114\3\0\1\114\4\0\2\114\147\0\1\346\25\235"+ + "\1\u038d\4\235\1\117\12\235\1\0\3\114\1\0\2\114"+ + "\1\115\3\114\3\0\1\114\4\0\2\114\147\0\1\346"+ + "\32\235\1\117\11\235\1\u038e\1\0\3\114\1\0\2\114"+ + "\1\115\3\114\3\0\1\114\4\0\2\114\147\0\1\346"+ + "\1\u038f\31\235\1\117\12\235\1\0\3\114\1\0\2\114"+ + "\1\115\3\114\3\0\1\114\4\0\2\114\147\0\1\346"+ + "\12\235\1\u0390\17\235\1\117\12\235\1\0\3\114\1\0"+ + "\2\114\1\115\3\114\3\0\1\114\4\0\2\114\147\0"+ + "\1\346\1\u0391\31\235\1\117\12\235\1\0\3\114\1\0"+ + "\2\114\1\115\3\114\3\0\1\114\4\0\2\114\147\0"+ + "\1\346\10\235\1\u0392\21\235\1\117\12\235\1\0\3\114"+ + "\1\0\2\114\1\115\3\114\3\0\1\114\4\0\2\114"+ + "\147\0\1\346\31\235\1\u0393\1\117\12\235\1\0\3\114"+ + "\1\0\2\114\1\115\3\114\3\0\1\114\4\0\2\114"+ + "\150\0\4\u0394\2\0\1\u0394\15\0\1\u0394\6\0\12\u0394"+ + "\1\u030f\174\0\4\u0395\2\0\1\u0395\15\0\1\u0395\6\0"+ + "\12\u0395\1\u0396\174\0\4\u0397\2\0\1\u0397\15\0\1\u0397"+ + "\6\0\1\u0398\2\u0399\1\u0398\4\u0399\1\u039a\1\u0399\14\0"+ + "\1\u02c9\160\0\4\u039b\2\0\1\u039b\15\0\1\u039b\6\0"+ + "\12\u039b\1\u0359\13\0\1\u02c9\160\0\4\u0397\2\0\1\u0397"+ + "\15\0\1\u0397\6\0\1\u0398\2\u0399\1\u0398\4\u0399\1\u039a"+ + "\1\u0399\174\0\1\u0316\4\u039b\2\0\1\u039b\15\0\1\u039b"+ + "\6\0\12\u039c\1\u0359\13\0\1\u02c9\157\0\1\u0316\4\u039b"+ + "\2\0\1\u039b\15\0\1\u039b\6\0\12\u039b\1\u0359\13\0"+ + "\1\u02c9\157\0\1\u0316\4\u039b\2\0\1\u039b\15\0\1\u039b"+ + "\6\0\2\u039c\1\u039b\2\u039c\2\u039b\2\u039c\1\u039b\1\u0359"+ + "\13\0\1\u02c9\160\0\4\u039d\2\0\1\u039d\15\0\1\u039d"+ + "\6\0\12\u039d\1\u0315\13\0\1\u02c9\157\0\1\u039e\33\0"+ + "\12\u035f\174\0\1\u039e\33\0\12\u039f\174\0\1\u039e\33\0"+ + "\1\u035f\1\u03a0\1\u039f\2\u035f\2\u039f\2\u035f\1\u039f\174\0"+ + "\1\u0316\4\u039d\2\0\1\u039d\15\0\1\u039d\6\0\12\u039d"+ + "\1\u0315\13\0\1\u02c9\157\0\46\u01ab\1\0\2\u01ab\1\u01e5"+ + "\3\u01ab\1\u01ad\1\0\1\u01e4\3\0\2\u01ab\4\0\1\u01ab"+ + "\234\0\1\u03a1\211\0\12\u03a2\11\0\1\u01e4\230\0\1\u02cf"+ + "\174\0\4\u03a3\2\0\1\u03a3\15\0\1\u03a3\6\0\12\u03a3"+ + "\1\u0367\174\0\4\u03a4\2\0\1\u03a4\15\0\1\u03a4\6\0"+ + "\12\u03a4\1\u03a5\174\0\4\u03a6\2\0\1\u03a6\15\0\1\u03a6"+ + "\6\0\12\u03a6\1\u03a7\13\0\1\u0286\157\0\1\u02d6\4\u03a6"+ + "\2\0\1\u03a6\15\0\1\u03a6\6\0\12\u03a8\1\u03a7\13\0"+ + "\1\u0286\157\0\1\u02d6\4\u03a6\2\0\1\u03a6\15\0\1\u03a6"+ + "\6\0\12\u03a9\1\u03a7\13\0\1\u0286\157\0\1\u02d6\4\u03a6"+ + "\2\0\1\u03a6\15\0\1\u03a6\6\0\1\u03a8\1\u03aa\1\u03a9"+ + "\2\u03a8\2\u03a9\2\u03a8\1\u03a9\1\u03a7\13\0\1\u0286\160\0"+ + "\4\u03ab\2\0\1\u03ab\15\0\1\u03ab\6\0\12\u03ab\1\u0321"+ + "\13\0\1\u0286\157\0\1\u02d6\4\u03ab\2\0\1\u03ab\15\0"+ + "\1\u03ab\6\0\12\u03ab\1\u0321\13\0\1\u0286\225\0\1\u02d5"+ + "\13\0\1\u0286\213\0\1\u03ac\2\u03ad\1\u03ac\4\u03ad\1\u03ae"+ + "\1\u03ad\174\0\1\u036f\241\0\1\u036f\33\0\2\u0370\1\0"+ + "\2\u0370\2\0\2\u0370\175\0\34\u01ec\12\u03af\1\0\2\u01ec"+ + "\1\u0238\3\u01ec\1\u01ee\1\u014e\1\u014f\1\u0150\2\0\2\u01ec"+ + "\4\0\1\u01ec\216\0\1\u02dd\174\0\4\u03b0\2\0\1\u03b0"+ + "\15\0\1\u03b0\6\0\12\u03b0\1\u0375\174\0\4\u03b1\2\0"+ + "\1\u03b1\15\0\1\u03b1\6\0\1\u03b2\2\u03b3\1\u03b2\4\u03b3"+ + "\1\u03b4\1\u03b3\1\u03b5\174\0\4\u03b6\2\0\1\u03b6\15\0"+ + "\1\u03b6\6\0\12\u03b6\1\u03b7\13\0\1\u0109\157\0\1\u014a"+ + "\4\u03b6\2\0\1\u03b6\15\0\1\u03b6\6\0\12\u03b8\1\u03b7"+ + "\13\0\1\u0109\157\0\1\u014a\4\u03b6\2\0\1\u03b6\15\0"+ + "\1\u03b6\6\0\12\u03b9\1\u03b7\13\0\1\u0109\157\0\1\u014a"+ + "\4\u03b6\2\0\1\u03b6\15\0\1\u03b6\6\0\1\u03b8\1\u03ba"+ + "\1\u03b9\2\u03b8\2\u03b9\2\u03b8\1\u03b9\1\u03b7\13\0\1\u0109"+ + "\160\0\4\u03bb\2\0\1\u03bb\15\0\1\u03bb\6\0\12\u03bb"+ + "\1\u032f\13\0\1\u0109\157\0\1\u014a\4\u03bb\2\0\1\u03bb"+ + "\15\0\1\u03bb\6\0\12\u03bb\1\u032f\13\0\1\u0109\157\0"+ + "\1\317\2\201\1\u0151\27\201\1\320\12\201\174\0\1\317"+ + "\3\201\1\u03bc\26\201\1\320\12\201\174\0\1\317\32\201"+ + "\1\320\11\201\1\u03bd\174\0\1\317\32\201\1\320\11\201"+ + "\1\u03be\174\0\1\317\32\201\1\320\7\201\1\u03bf\2\201"+ + "\174\0\1\317\32\201\1\320\4\201\1\u03c0\5\201\174\0"+ + "\1\317\26\201\1\u03c1\3\201\1\320\12\201\174\0\1\317"+ + "\30\201\1\u03c2\1\201\1\320\12\201\174\0\1\317\11\201"+ + "\1\u018b\20\201\1\320\12\201\174\0\1\317\12\201\1\u03c3"+ + "\17\201\1\320\12\201\174\0\1\317\17\201\1\u0159\12\201"+ + "\1\320\12\201\174\0\1\317\32\201\1\320\4\201\1\u03c4"+ + "\5\201\174\0\1\317\30\201\1\u03c5\1\201\1\320\12\201"+ + "\174\0\1\317\30\201\1\u03c6\1\201\1\320\12\201\174\0"+ + "\1\346\32\235\1\117\6\235\1\u02ad\3\235\1\0\3\114"+ + "\1\0\2\114\1\115\3\114\3\0\1\114\4\0\2\114"+ + "\147\0\1\346\15\235\1\260\14\235\1\117\12\235\1\0"+ + "\3\114\1\0\2\114\1\115\3\114\3\0\1\114\4\0"+ + "\2\114\147\0\1\346\32\235\1\117\1\235\1\u03c7\10\235"+ + "\1\0\3\114\1\0\2\114\1\115\3\114\3\0\1\114"+ + "\4\0\2\114\147\0\1\346\30\235\1\u03c8\1\235\1\117"+ + "\12\235\1\0\3\114\1\0\2\114\1\115\3\114\3\0"+ + "\1\114\4\0\2\114\147\0\1\346\6\235\1\u03c9\23\235"+ + "\1\117\12\235\1\0\3\114\1\0\2\114\1\115\3\114"+ + "\3\0\1\114\4\0\2\114\147\0\1\346\32\235\1\117"+ + "\5\235\1\u03ca\4\235\1\0\3\114\1\0\2\114\1\115"+ + "\3\114\3\0\1\114\4\0\2\114\147\0\1\346\22\235"+ + "\1\353\7\235\1\117\12\235\1\0\3\114\1\0\2\114"+ + "\1\115\3\114\3\0\1\114\4\0\2\114\147\0\1\346"+ + "\32\235\1\117\5\235\1\u03cb\4\235\1\0\3\114\1\0"+ + "\2\114\1\115\3\114\3\0\1\114\4\0\2\114\147\0"+ + "\1\346\32\235\1\117\1\235\1\260\10\235\1\0\3\114"+ + "\1\0\2\114\1\115\3\114\3\0\1\114\4\0\2\114"+ + "\147\0\1\346\13\235\1\u03cc\16\235\1\117\12\235\1\0"+ + "\3\114\1\0\2\114\1\115\3\114\3\0\1\114\4\0"+ + "\2\114\215\0\1\u030f\174\0\4\u03cd\2\0\1\u03cd\15\0"+ + "\1\u03cd\6\0\12\u03cd\1\u0396\174\0\4\u03ce\2\0\1\u03ce"+ + "\15\0\1\u03ce\6\0\12\u03ce\1\u03cf\174\0\4\u03d0\2\0"+ + "\1\u03d0\15\0\1\u03d0\6\0\12\u03d0\1\u03d1\13\0\1\u02c9"+ + "\157\0\1\u0316\4\u03d0\2\0\1\u03d0\15\0\1\u03d0\6\0"+ + "\12\u03d2\1\u03d1\13\0\1\u02c9\157\0\1\u0316\4\u03d0\2\0"+ + "\1\u03d0\15\0\1\u03d0\6\0\12\u03d3\1\u03d1\13\0\1\u02c9"+ + "\157\0\1\u0316\4\u03d0\2\0\1\u03d0\15\0\1\u03d0\6\0"+ + "\1\u03d2\1\u03d4\1\u03d3\2\u03d2\2\u03d3\2\u03d2\1\u03d3\1\u03d1"+ + "\13\0\1\u02c9\160\0\4\u03d5\2\0\1\u03d5\15\0\1\u03d5"+ + "\6\0\12\u03d5\1\u0359\13\0\1\u02c9\157\0\1\u0316\4\u03d5"+ + "\2\0\1\u03d5\15\0\1\u03d5\6\0\12\u03d5\1\u0359\13\0"+ + "\1\u02c9\225\0\1\u0315\13\0\1\u02c9\213\0\1\u03d6\2\u03d7"+ + "\1\u03d6\4\u03d7\1\u03d8\1\u03d7\174\0\1\u039e\241\0\1\u039e"+ + "\33\0\2\u039f\1\0\2\u039f\2\0\2\u039f\176\0\1\u03d9"+ + "\1\0\1\u03d9\5\0\1\u03d9\307\0\1\u01e4\163\0\4\u03da"+ + "\2\0\1\u03da\15\0\1\u03da\6\0\12\u03da\1\u0367\174\0"+ + "\4\u03db\2\0\1\u03db\15\0\1\u03db\6\0\12\u03db\1\u03dc"+ + "\174\0\4\u03dd\2\0\1\u03dd\15\0\1\u03dd\6\0\1\u03de"+ + "\2\u03df\1\u03de\4\u03df\1\u03e0\1\u03df\14\0\1\u0286\160\0"+ + "\4\u03e1\2\0\1\u03e1\15\0\1\u03e1\6\0\12\u03e1\1\u03a7"+ + "\13\0\1\u0286\160\0\4\u03dd\2\0\1\u03dd\15\0\1\u03dd"+ + "\6\0\1\u03de\2\u03df\1\u03de\4\u03df\1\u03e0\1\u03df\174\0"+ + "\1\u02d6\4\u03e1\2\0\1\u03e1\15\0\1\u03e1\6\0\12\u03e2"+ + "\1\u03a7\13\0\1\u0286\157\0\1\u02d6\4\u03e1\2\0\1\u03e1"+ + "\15\0\1\u03e1\6\0\12\u03e1\1\u03a7\13\0\1\u0286\157\0"+ + "\1\u02d6\4\u03e1\2\0\1\u03e1\15\0\1\u03e1\6\0\2\u03e2"+ + "\1\u03e1\2\u03e2\2\u03e1\2\u03e2\1\u03e1\1\u03a7\13\0\1\u0286"+ + "\225\0\1\u0321\13\0\1\u0286\157\0\1\u03e3\33\0\12\u03ad"+ + "\174\0\1\u03e3\33\0\12\u03e4\174\0\1\u03e3\33\0\1\u03ad"+ + "\1\u03e5\1\u03e4\2\u03ad\2\u03e4\2\u03ad\1\u03e4\174\0\46\u01ec"+ + "\1\0\2\u01ec\1\u0238\3\u01ec\1\u01ee\1\u014e\1\u014f\1\u0150"+ + "\2\0\2\u01ec\4\0\1\u01ec\151\0\4\u03e6\2\0\1\u03e6"+ + "\15\0\1\u03e6\6\0\12\u03e6\1\u0375\174\0\4\u03e7\2\0"+ + "\1\u03e7\15\0\1\u03e7\6\0\12\u03e7\1\u03e8\173\0\1\u014a"+ + "\4\u03e7\2\0\1\u03e7\15\0\1\u03e7\6\0\12\u03e9\1\u03e8"+ + "\173\0\1\u014a\4\u03e7\2\0\1\u03e7\15\0\1\u03e7\6\0"+ + "\12\u03ea\1\u03e8\173\0\1\u014a\4\u03e7\2\0\1\u03e7\15\0"+ + "\1\u03e7\6\0\1\u03e9\1\u03eb\1\u03ea\2\u03e9\2\u03ea\2\u03e9"+ + "\1\u03ea\1\u03e8\174\0\4\u03ec\2\0\1\u03ec\15\0\1\u03ec"+ + "\6\0\12\u03ec\14\0\1\u0109\160\0\4\u03ed\2\0\1\u03ed"+ + "\15\0\1\u03ed\6\0\12\u03ed\1\u03b7\13\0\1\u0109\160\0"+ + "\4\u03ec\2\0\1\u03ec\15\0\1\u03ec\6\0\12\u03ec\174\0"+ + "\1\u014a\4\u03ed\2\0\1\u03ed\15\0\1\u03ed\6\0\12\u03ee"+ + "\1\u03b7\13\0\1\u0109\157\0\1\u014a\4\u03ed\2\0\1\u03ed"+ + "\15\0\1\u03ed\6\0\12\u03ed\1\u03b7\13\0\1\u0109\157\0"+ + "\1\u014a\4\u03ed\2\0\1\u03ed\15\0\1\u03ed\6\0\2\u03ee"+ + "\1\u03ed\2\u03ee\2\u03ed\2\u03ee\1\u03ed\1\u03b7\13\0\1\u0109"+ + "\225\0\1\u032f\13\0\1\u0109\157\0\1\317\32\201\1\320"+ + "\10\201\1\u033c\1\201\174\0\1\317\1\u03ef\31\201\1\320"+ + "\12\201\174\0\1\317\7\201\1\u03f0\22\201\1\320\12\201"+ + "\174\0\1\317\1\u03f1\31\201\1\320\12\201\174\0\1\317"+ + "\25\201\1\u03f2\4\201\1\320\12\201\174\0\1\317\32\201"+ + "\1\320\11\201\1\u03f3\174\0\1\317\1\u03f4\31\201\1\320"+ + "\12\201\174\0\1\317\12\201\1\u03f5\17\201\1\320\12\201"+ + "\174\0\1\317\1\u03f6\31\201\1\320\12\201\174\0\1\317"+ + "\10\201\1\u03f7\21\201\1\320\12\201\174\0\1\317\31\201"+ + "\1\u03f8\1\320\12\201\174\0\1\346\2\235\1\u03f9\27\235"+ + "\1\117\12\235\1\0\3\114\1\0\2\114\1\115\3\114"+ + "\3\0\1\114\4\0\2\114\147\0\1\346\3\235\1\u03fa"+ + "\26\235\1\117\12\235\1\0\3\114\1\0\2\114\1\115"+ + "\3\114\3\0\1\114\4\0\2\114\147\0\1\346\32\235"+ + "\1\117\1\235\1\u03fb\10\235\1\0\3\114\1\0\2\114"+ + "\1\115\3\114\3\0\1\114\4\0\2\114\147\0\1\346"+ + "\3\235\1\u03fc\26\235\1\117\12\235\1\0\3\114\1\0"+ + "\2\114\1\115\3\114\3\0\1\114\4\0\2\114\147\0"+ + "\1\346\1\u03fd\31\235\1\117\12\235\1\0\3\114\1\0"+ + "\2\114\1\115\3\114\3\0\1\114\4\0\2\114\147\0"+ + "\1\346\26\235\1\u03fe\3\235\1\117\12\235\1\0\3\114"+ + "\1\0\2\114\1\115\3\114\3\0\1\114\4\0\2\114"+ + "\150\0\4\u03ff\2\0\1\u03ff\15\0\1\u03ff\6\0\12\u03ff"+ + "\1\u0396\174\0\4\u0400\2\0\1\u0400\15\0\1\u0400\6\0"+ + "\12\u0400\1\u0401\174\0\4\u0402\2\0\1\u0402\15\0\1\u0402"+ + "\6\0\1\u0403\2\u0404\1\u0403\4\u0404\1\u0405\1\u0404\14\0"+ + "\1\u02c9\160\0\4\u0406\2\0\1\u0406\15\0\1\u0406\6\0"+ + "\12\u0406\1\u03d1\13\0\1\u02c9\160\0\4\u0402\2\0\1\u0402"+ + "\15\0\1\u0402\6\0\1\u0403\2\u0404\1\u0403\4\u0404\1\u0405"+ + "\1\u0404\174\0\1\u0316\4\u0406\2\0\1\u0406\15\0\1\u0406"+ + "\6\0\12\u0407\1\u03d1\13\0\1\u02c9\157\0\1\u0316\4\u0406"+ + "\2\0\1\u0406\15\0\1\u0406\6\0\12\u0406\1\u03d1\13\0"+ + "\1\u02c9\157\0\1\u0316\4\u0406\2\0\1\u0406\15\0\1\u0406"+ + "\6\0\2\u0407\1\u0406\2\u0407\2\u0406\2\u0407\1\u0406\1\u03d1"+ + "\13\0\1\u02c9\225\0\1\u0359\13\0\1\u02c9\157\0\1\u0408"+ + "\33\0\12\u03d7\174\0\1\u0408\33\0\12\u0409\174\0\1\u0408"+ + "\33\0\1\u03d7\1\u040a\1\u0409\2\u03d7\2\u0409\2\u03d7\1\u0409"+ + "\254\0\1\u0150\227\0\1\u0367\174\0\4\u040b\2\0\1\u040b"+ + "\15\0\1\u040b\6\0\12\u040b\1\u03dc\174\0\4\u040c\2\0"+ + "\1\u040c\15\0\1\u040c\6\0\12\u040c\1\u040d\174\0\4\u040e"+ + "\2\0\1\u040e\15\0\1\u040e\6\0\12\u040e\1\u040f\13\0"+ + "\1\u0286\157\0\1\u02d6\4\u040e\2\0\1\u040e\15\0\1\u040e"+ + "\6\0\12\u0410\1\u040f\13\0\1\u0286\157\0\1\u02d6\4\u040e"+ + "\2\0\1\u040e\15\0\1\u040e\6\0\12\u0411\1\u040f\13\0"+ + "\1\u0286\157\0\1\u02d6\4\u040e\2\0\1\u040e\15\0\1\u040e"+ + "\6\0\1\u0410\1\u0412\1\u0411\2\u0410\2\u0411\2\u0410\1\u0411"+ + "\1\u040f\13\0\1\u0286\160\0\4\u0413\2\0\1\u0413\15\0"+ + "\1\u0413\6\0\12\u0413\1\u03a7\13\0\1\u0286\157\0\1\u02d6"+ + "\4\u0413\2\0\1\u0413\15\0\1\u0413\6\0\12\u0413\1\u03a7"+ + "\13\0\1\u0286\213\0\1\u0414\2\u0415\1\u0414\4\u0415\1\u0416"+ + "\1\u0415\174\0\1\u03e3\241\0\1\u03e3\33\0\2\u03e4\1\0"+ + "\2\u03e4\2\0\2\u03e4\243\0\1\u0375\174\0\4\u0417\2\0"+ + "\1\u0417\15\0\1\u0417\6\0\12\u0417\1\u03e8\174\0\4\u03ec"+ + "\2\0\1\u03ec\15\0\1\u03ec\6\0\12\u03ec\1\u02e4\173\0"+ + "\1\u014a\4\u0417\2\0\1\u0417\15\0\1\u0417\6\0\12\u0418"+ + "\1\u03e8\173\0\1\u014a\4\u0417\2\0\1\u0417\15\0\1\u0417"+ + "\6\0\12\u0417\1\u03e8\173\0\1\u014a\4\u0417\2\0\1\u0417"+ + "\15\0\1\u0417\6\0\2\u0418\1\u0417\2\u0418\2\u0417\2\u0418"+ + "\1\u0417\1\u03e8\174\0\4\u0419\2\0\1\u0419\15\0\1\u0419"+ + "\6\0\12\u0419\14\0\1\u0109\160\0\4\u041a\2\0\1\u041a"+ + "\15\0\1\u041a\6\0\12\u041a\1\u03b7\13\0\1\u0109\157\0"+ + "\1\u014a\4\u041a\2\0\1\u041a\15\0\1\u041a\6\0\12\u041a"+ + "\1\u03b7\13\0\1\u0109\157\0\1\317\32\201\1\320\6\201"+ + "\1\u0335\3\201\174\0\1\317\15\201\1\u0116\14\201\1\320"+ + "\12\201\174\0\1\317\32\201\1\320\1\201\1\u041b\10\201"+ + "\174\0\1\317\30\201\1\u041c\1\201\1\320\12\201\174\0"+ + "\1\317\6\201\1\u041d\23\201\1\320\12\201\174\0\1\317"+ + "\32\201\1\320\5\201\1\u041e\4\201\174\0\1\317\22\201"+ + "\1\u0151\7\201\1\320\12\201\174\0\1\317\32\201\1\320"+ + "\5\201\1\u041f\4\201\174\0\1\317\32\201\1\320\1\201"+ + "\1\u0116\10\201\174\0\1\317\13\201\1\u0420\16\201\1\320"+ + "\12\201\174\0\1\346\32\235\1\117\7\235\1\u0421\2\235"+ + "\1\0\3\114\1\0\2\114\1\115\3\114\3\0\1\114"+ + "\4\0\2\114\147\0\1\346\32\235\1\117\10\235\1\260"+ + "\1\235\1\0\3\114\1\0\2\114\1\115\3\114\3\0"+ + "\1\114\4\0\2\114\147\0\1\346\32\235\1\117\4\235"+ + "\1\u0422\5\235\1\0\3\114\1\0\2\114\1\115\3\114"+ + "\3\0\1\114\4\0\2\114\147\0\1\346\16\235\1\u0423"+ + "\13\235\1\117\12\235\1\0\3\114\1\0\2\114\1\115"+ + "\3\114\3\0\1\114\4\0\2\114\147\0\1\346\26\235"+ + "\1\u0424\3\235\1\117\12\235\1\0\3\114\1\0\2\114"+ + "\1\115\3\114\3\0\1\114\4\0\2\114\147\0\1\346"+ + "\32\235\1\117\7\235\1\u0425\2\235\1\0\3\114\1\0"+ + "\2\114\1\115\3\114\3\0\1\114\4\0\2\114\215\0"+ + "\1\u0396\174\0\4\u0426\2\0\1\u0426\15\0\1\u0426\6\0"+ + "\12\u0426\1\u0401\174\0\4\u0427\2\0\1\u0427\15\0\1\u0427"+ + "\6\0\12\u0427\1\u0428\174\0\4\u0429\2\0\1\u0429\15\0"+ + "\1\u0429\6\0\12\u0429\1\u042a\13\0\1\u02c9\157\0\1\u0316"+ + "\4\u0429\2\0\1\u0429\15\0\1\u0429\6\0\12\u042b\1\u042a"+ + "\13\0\1\u02c9\157\0\1\u0316\4\u0429\2\0\1\u0429\15\0"+ + "\1\u0429\6\0\12\u042c\1\u042a\13\0\1\u02c9\157\0\1\u0316"+ + "\4\u0429\2\0\1\u0429\15\0\1\u0429\6\0\1\u042b\1\u042d"+ + "\1\u042c\2\u042b\2\u042c\2\u042b\1\u042c\1\u042a\13\0\1\u02c9"+ + "\160\0\4\u042e\2\0\1\u042e\15\0\1\u042e\6\0\12\u042e"+ + "\1\u03d1\13\0\1\u02c9\157\0\1\u0316\4\u042e\2\0\1\u042e"+ + "\15\0\1\u042e\6\0\12\u042e\1\u03d1\13\0\1\u02c9\213\0"+ + "\1\u042f\2\u0430\1\u042f\4\u0430\1\u0431\1\u0430\174\0\1\u0408"+ + "\241\0\1\u0408\33\0\2\u0409\1\0\2\u0409\2\0\2\u0409"+ + "\176\0\4\u0432\2\0\1\u0432\15\0\1\u0432\6\0\12\u0432"+ + "\1\u03dc\174\0\4\u0433\2\0\1\u0433\15\0\1\u0433\6\0"+ + "\12\u0433\1\u0434\174\0\4\u0435\2\0\1\u0435\15\0\1\u0435"+ + "\6\0\1\u0436\2\u0437\1\u0436\4\u0437\1\u0438\1\u0437\14\0"+ + "\1\u0286\160\0\4\u0439\2\0\1\u0439\15\0\1\u0439\6\0"+ + "\12\u0439\1\u040f\13\0\1\u0286\160\0\4\u0435\2\0\1\u0435"+ + "\15\0\1\u0435\6\0\1\u0436\2\u0437\1\u0436\4\u0437\1\u0438"+ + "\1\u0437\174\0\1\u02d6\4\u0439\2\0\1\u0439\15\0\1\u0439"+ + "\6\0\12\u043a\1\u040f\13\0\1\u0286\157\0\1\u02d6\4\u0439"+ + "\2\0\1\u0439\15\0\1\u0439\6\0\12\u0439\1\u040f\13\0"+ + "\1\u0286\157\0\1\u02d6\4\u0439\2\0\1\u0439\15\0\1\u0439"+ + "\6\0\2\u043a\1\u0439\2\u043a\2\u0439\2\u043a\1\u0439\1\u040f"+ + "\13\0\1\u0286\225\0\1\u03a7\13\0\1\u0286\213\0\12\u0415"+ + "\14\0\1\u0286\213\0\12\u043b\14\0\1\u0286\213\0\1\u0415"+ + "\1\u043c\1\u043b\2\u0415\2\u043b\2\u0415\1\u043b\14\0\1\u0286"+ + "\160\0\4\u043d\2\0\1\u043d\15\0\1\u043d\6\0\12\u043d"+ + "\1\u03e8\173\0\1\u014a\4\u043d\2\0\1\u043d\15\0\1\u043d"+ + "\6\0\12\u043d\1\u03e8\174\0\4\u043e\2\0\1\u043e\15\0"+ + "\1\u043e\6\0\12\u043e\14\0\1\u0109\225\0\1\u03b7\13\0"+ + "\1\u0109\157\0\1\317\2\201\1\u043f\27\201\1\320\12\201"+ + "\174\0\1\317\3\201\1\u0440\26\201\1\320\12\201\174\0"+ + "\1\317\32\201\1\320\1\201\1\u0441\10\201\174\0\1\317"+ + "\3\201\1\u0442\26\201\1\320\12\201\174\0\1\317\1\u0443"+ + "\31\201\1\320\12\201\174\0\1\317\26\201\1\u0444\3\201"+ + "\1\320\12\201\174\0\1\346\1\u0445\31\235\1\117\12\235"+ + "\1\0\3\114\1\0\2\114\1\115\3\114\3\0\1\114"+ + "\4\0\2\114\147\0\1\346\24\235\1\u0446\5\235\1\117"+ + "\12\235\1\0\3\114\1\0\2\114\1\115\3\114\3\0"+ + "\1\114\4\0\2\114\147\0\1\346\1\235\1\u0447\30\235"+ + "\1\117\12\235\1\0\3\114\1\0\2\114\1\115\3\114"+ + "\3\0\1\114\4\0\2\114\147\0\1\346\32\235\1\117"+ + "\2\235\1\362\7\235\1\0\3\114\1\0\2\114\1\115"+ + "\3\114\3\0\1\114\4\0\2\114\147\0\1\346\6\235"+ + "\1\353\23\235\1\117\12\235\1\0\3\114\1\0\2\114"+ + "\1\115\3\114\3\0\1\114\4\0\2\114\150\0\4\u0448"+ + "\2\0\1\u0448\15\0\1\u0448\6\0\12\u0448\1\u0401\174\0"+ + "\4\u0449\2\0\1\u0449\15\0\1\u0449\6\0\12\u0449\1\u044a"+ + "\174\0\4\u044b\2\0\1\u044b\15\0\1\u044b\6\0\1\u044c"+ + "\2\u044d\1\u044c\4\u044d\1\u044e\1\u044d\14\0\1\u02c9\160\0"+ + "\4\u044f\2\0\1\u044f\15\0\1\u044f\6\0\12\u044f\1\u042a"+ + "\13\0\1\u02c9\160\0\4\u044b\2\0\1\u044b\15\0\1\u044b"+ + "\6\0\1\u044c\2\u044d\1\u044c\4\u044d\1\u044e\1\u044d\174\0"+ + "\1\u0316\4\u044f\2\0\1\u044f\15\0\1\u044f\6\0\12\u0450"+ + "\1\u042a\13\0\1\u02c9\157\0\1\u0316\4\u044f\2\0\1\u044f"+ + "\15\0\1\u044f\6\0\12\u044f\1\u042a\13\0\1\u02c9\157\0"+ + "\1\u0316\4\u044f\2\0\1\u044f\15\0\1\u044f\6\0\2\u0450"+ + "\1\u044f\2\u0450\2\u044f\2\u0450\1\u044f\1\u042a\13\0\1\u02c9"+ + "\225\0\1\u03d1\13\0\1\u02c9\213\0\12\u0430\14\0\1\u02c9"+ + "\213\0\12\u0451\14\0\1\u02c9\213\0\1\u0430\1\u0452\1\u0451"+ + "\2\u0430\2\u0451\2\u0430\1\u0451\14\0\1\u02c9\225\0\1\u03dc"+ + "\174\0\4\u0453\2\0\1\u0453\15\0\1\u0453\6\0\12\u0453"+ + "\1\u0434\174\0\4\u0454\2\0\1\u0454\15\0\1\u0454\6\0"+ + "\12\u0454\1\u0455\174\0\4\u0456\2\0\1\u0456\15\0\1\u0456"+ + "\6\0\12\u0456\1\u0457\13\0\1\u0286\157\0\1\u02d6\4\u0456"+ + "\2\0\1\u0456\15\0\1\u0456\6\0\12\u0458\1\u0457\13\0"+ + "\1\u0286\157\0\1\u02d6\4\u0456\2\0\1\u0456\15\0\1\u0456"+ + "\6\0\12\u0459\1\u0457\13\0\1\u0286\157\0\1\u02d6\4\u0456"+ + "\2\0\1\u0456\15\0\1\u0456\6\0\1\u0458\1\u045a\1\u0459"+ + "\2\u0458\2\u0459\2\u0458\1\u0459\1\u0457\13\0\1\u0286\160\0"+ + "\4\u045b\2\0\1\u045b\15\0\1\u045b\6\0\12\u045b\1\u040f"+ + "\13\0\1\u0286\157\0\1\u02d6\4\u045b\2\0\1\u045b\15\0"+ + "\1\u045b\6\0\12\u045b\1\u040f\13\0\1\u0286\241\0\1\u0286"+ + "\213\0\2\u043b\1\0\2\u043b\2\0\2\u043b\15\0\1\u0286"+ + "\225\0\1\u03e8\174\0\4\u02e4\2\0\1\u02e4\15\0\1\u02e4"+ + "\6\0\12\u02e4\14\0\1\u0109\157\0\1\317\32\201\1\320"+ + "\7\201\1\u045c\2\201\174\0\1\317\32\201\1\320\10\201"+ + "\1\u0116\1\201\174\0\1\317\32\201\1\320\4\201\1\u045d"+ + "\5\201\174\0\1\317\16\201\1\u045e\13\201\1\320\12\201"+ + "\174\0\1\317\26\201\1\u045f\3\201\1\320\12\201\174\0"+ + "\1\317\32\201\1\320\7\201\1\u0460\2\201\174\0\1\346"+ + "\4\235\1\353\25\235\1\117\12\235\1\0\3\114\1\0"+ + "\2\114\1\115\3\114\3\0\1\114\4\0\2\114\147\0"+ + "\1\346\24\235\1\260\5\235\1\117\12\235\1\0\3\114"+ + "\1\0\2\114\1\115\3\114\3\0\1\114\4\0\2\114"+ + "\147\0\1\346\32\235\1\117\11\235\1\260\1\0\3\114"+ + "\1\0\2\114\1\115\3\114\3\0\1\114\4\0\2\114"+ + "\215\0\1\u0401\174\0\4\u0461\2\0\1\u0461\15\0\1\u0461"+ + "\6\0\12\u0461\1\u044a\174\0\4\u0462\2\0\1\u0462\15\0"+ + "\1\u0462\6\0\12\u0462\1\u0463\174\0\4\u0464\2\0\1\u0464"+ + "\15\0\1\u0464\6\0\12\u0464\1\u0465\13\0\1\u02c9\157\0"+ + "\1\u0316\4\u0464\2\0\1\u0464\15\0\1\u0464\6\0\12\u0466"+ + "\1\u0465\13\0\1\u02c9\157\0\1\u0316\4\u0464\2\0\1\u0464"+ + "\15\0\1\u0464\6\0\12\u0467\1\u0465\13\0\1\u02c9\157\0"+ + "\1\u0316\4\u0464\2\0\1\u0464\15\0\1\u0464\6\0\1\u0466"+ + "\1\u0468\1\u0467\2\u0466\2\u0467\2\u0466\1\u0467\1\u0465\13\0"+ + "\1\u02c9\160\0\4\u0469\2\0\1\u0469\15\0\1\u0469\6\0"+ + "\12\u0469\1\u042a\13\0\1\u02c9\157\0\1\u0316\4\u0469\2\0"+ + "\1\u0469\15\0\1\u0469\6\0\12\u0469\1\u042a\13\0\1\u02c9"+ + "\241\0\1\u02c9\213\0\2\u0451\1\0\2\u0451\2\0\2\u0451"+ + "\15\0\1\u02c9\160\0\4\u046a\2\0\1\u046a\15\0\1\u046a"+ + "\6\0\12\u046a\1\u0434\174\0\4\u046b\2\0\1\u046b\15\0"+ + "\1\u046b\6\0\12\u046b\1\u046c\174\0\4\u046d\2\0\1\u046d"+ + "\15\0\1\u046d\6\0\1\u046e\2\u046f\1\u046e\4\u046f\1\u0470"+ + "\1\u046f\14\0\1\u0286\160\0\4\u0471\2\0\1\u0471\15\0"+ + "\1\u0471\6\0\12\u0471\1\u0457\13\0\1\u0286\160\0\4\u046d"+ + "\2\0\1\u046d\15\0\1\u046d\6\0\1\u046e\2\u046f\1\u046e"+ + "\4\u046f\1\u0470\1\u046f\174\0\1\u02d6\4\u0471\2\0\1\u0471"+ + "\15\0\1\u0471\6\0\12\u0472\1\u0457\13\0\1\u0286\157\0"+ + "\1\u02d6\4\u0471\2\0\1\u0471\15\0\1\u0471\6\0\12\u0471"+ + "\1\u0457\13\0\1\u0286\157\0\1\u02d6\4\u0471\2\0\1\u0471"+ + "\15\0\1\u0471\6\0\2\u0472\1\u0471\2\u0472\2\u0471\2\u0472"+ + "\1\u0471\1\u0457\13\0\1\u0286\225\0\1\u040f\13\0\1\u0286"+ + "\157\0\1\317\1\u0473\31\201\1\320\12\201\174\0\1\317"+ + "\24\201\1\u0474\5\201\1\320\12\201\174\0\1\317\1\201"+ + "\1\u0475\30\201\1\320\12\201\174\0\1\317\32\201\1\320"+ + "\2\201\1\u0158\7\201\174\0\1\317\6\201\1\u0151\23\201"+ + "\1\320\12\201\175\0\4\u0476\2\0\1\u0476\15\0\1\u0476"+ + "\6\0\12\u0476\1\u044a\174\0\4\u0477\2\0\1\u0477\15\0"+ + "\1\u0477\6\0\12\u0477\1\u0478\174\0\4\u0479\2\0\1\u0479"+ + "\15\0\1\u0479\6\0\1\u047a\2\u047b\1\u047a\4\u047b\1\u047c"+ + "\1\u047b\14\0\1\u02c9\160\0\4\u047d\2\0\1\u047d\15\0"+ + "\1\u047d\6\0\12\u047d\1\u0465\13\0\1\u02c9\160\0\4\u0479"+ + "\2\0\1\u0479\15\0\1\u0479\6\0\1\u047a\2\u047b\1\u047a"+ + "\4\u047b\1\u047c\1\u047b\174\0\1\u0316\4\u047d\2\0\1\u047d"+ + "\15\0\1\u047d\6\0\12\u047e\1\u0465\13\0\1\u02c9\157\0"+ + "\1\u0316\4\u047d\2\0\1\u047d\15\0\1\u047d\6\0\12\u047d"+ + "\1\u0465\13\0\1\u02c9\157\0\1\u0316\4\u047d\2\0\1\u047d"+ + "\15\0\1\u047d\6\0\2\u047e\1\u047d\2\u047e\2\u047d\2\u047e"+ + "\1\u047d\1\u0465\13\0\1\u02c9\225\0\1\u042a\13\0\1\u02c9"+ + "\225\0\1\u0434\174\0\4\u047f\2\0\1\u047f\15\0\1\u047f"+ + "\6\0\12\u047f\1\u046c\174\0\4\u0480\2\0\1\u0480\15\0"+ + "\1\u0480\6\0\1\u0481\2\u0482\1\u0481\4\u0482\1\u0483\1\u0482"+ + "\1\u0484\174\0\4\u0485\2\0\1\u0485\15\0\1\u0485\6\0"+ + "\12\u0485\1\u0486\13\0\1\u0286\157\0\1\u02d6\4\u0485\2\0"+ + "\1\u0485\15\0\1\u0485\6\0\12\u0487\1\u0486\13\0\1\u0286"+ + "\157\0\1\u02d6\4\u0485\2\0\1\u0485\15\0\1\u0485\6\0"+ + "\12\u0488\1\u0486\13\0\1\u0286\157\0\1\u02d6\4\u0485\2\0"+ + "\1\u0485\15\0\1\u0485\6\0\1\u0487\1\u0489\1\u0488\2\u0487"+ + "\2\u0488\2\u0487\1\u0488\1\u0486\13\0\1\u0286\160\0\4\u048a"+ + "\2\0\1\u048a\15\0\1\u048a\6\0\12\u048a\1\u0457\13\0"+ + "\1\u0286\157\0\1\u02d6\4\u048a\2\0\1\u048a\15\0\1\u048a"+ + "\6\0\12\u048a\1\u0457\13\0\1\u0286\157\0\1\317\4\201"+ + "\1\u0151\25\201\1\320\12\201\174\0\1\317\24\201\1\u0116"+ + "\5\201\1\320\12\201\174\0\1\317\32\201\1\320\11\201"+ + "\1\u0116\242\0\1\u044a\174\0\4\u048b\2\0\1\u048b\15\0"+ + "\1\u048b\6\0\12\u048b\1\u0478\174\0\4\u048c\2\0\1\u048c"+ + "\15\0\1\u048c\6\0\1\u048d\2\u048e\1\u048d\4\u048e\1\u048f"+ + "\1\u048e\1\u0490\174\0\4\u0491\2\0\1\u0491\15\0\1\u0491"+ + "\6\0\12\u0491\1\u0492\13\0\1\u02c9\157\0\1\u0316\4\u0491"+ + "\2\0\1\u0491\15\0\1\u0491\6\0\12\u0493\1\u0492\13\0"+ + "\1\u02c9\157\0\1\u0316\4\u0491\2\0\1\u0491\15\0\1\u0491"+ + "\6\0\12\u0494\1\u0492\13\0\1\u02c9\157\0\1\u0316\4\u0491"+ + "\2\0\1\u0491\15\0\1\u0491\6\0\1\u0493\1\u0495\1\u0494"+ + "\2\u0493\2\u0494\2\u0493\1\u0494\1\u0492\13\0\1\u02c9\160\0"+ + "\4\u0496\2\0\1\u0496\15\0\1\u0496\6\0\12\u0496\1\u0465"+ + "\13\0\1\u02c9\157\0\1\u0316\4\u0496\2\0\1\u0496\15\0"+ + "\1\u0496\6\0\12\u0496\1\u0465\13\0\1\u02c9\160\0\4\u0497"+ + "\2\0\1\u0497\15\0\1\u0497\6\0\12\u0497\1\u046c\174\0"+ + "\4\u0498\2\0\1\u0498\15\0\1\u0498\6\0\12\u0498\1\u0499"+ + "\173\0\1\u02d6\4\u0498\2\0\1\u0498\15\0\1\u0498\6\0"+ + "\12\u049a\1\u0499\173\0\1\u02d6\4\u0498\2\0\1\u0498\15\0"+ + "\1\u0498\6\0\12\u049b\1\u0499\173\0\1\u02d6\4\u0498\2\0"+ + "\1\u0498\15\0\1\u0498\6\0\1\u049a\1\u049c\1\u049b\2\u049a"+ + "\2\u049b\2\u049a\1\u049b\1\u0499\174\0\4\u049d\2\0\1\u049d"+ + "\15\0\1\u049d\6\0\12\u049d\14\0\1\u0286\160\0\4\u049e"+ + "\2\0\1\u049e\15\0\1\u049e\6\0\12\u049e\1\u0486\13\0"+ + "\1\u0286\160\0\4\u049d\2\0\1\u049d\15\0\1\u049d\6\0"+ + "\12\u049d\174\0\1\u02d6\4\u049e\2\0\1\u049e\15\0\1\u049e"+ + "\6\0\12\u049f\1\u0486\13\0\1\u0286\157\0\1\u02d6\4\u049e"+ + "\2\0\1\u049e\15\0\1\u049e\6\0\12\u049e\1\u0486\13\0"+ + "\1\u0286\157\0\1\u02d6\4\u049e\2\0\1\u049e\15\0\1\u049e"+ + "\6\0\2\u049f\1\u049e\2\u049f\2\u049e\2\u049f\1\u049e\1\u0486"+ + "\13\0\1\u0286\225\0\1\u0457\13\0\1\u0286\160\0\4\u04a0"+ + "\2\0\1\u04a0\15\0\1\u04a0\6\0\12\u04a0\1\u0478\174\0"+ + "\4\u04a1\2\0\1\u04a1\15\0\1\u04a1\6\0\12\u04a1\1\u04a2"+ + "\173\0\1\u0316\4\u04a1\2\0\1\u04a1\15\0\1\u04a1\6\0"+ + "\12\u04a3\1\u04a2\173\0\1\u0316\4\u04a1\2\0\1\u04a1\15\0"+ + "\1\u04a1\6\0\12\u04a4\1\u04a2\173\0\1\u0316\4\u04a1\2\0"+ + "\1\u04a1\15\0\1\u04a1\6\0\1\u04a3\1\u04a5\1\u04a4\2\u04a3"+ + "\2\u04a4\2\u04a3\1\u04a4\1\u04a2\174\0\4\u04a6\2\0\1\u04a6"+ + "\15\0\1\u04a6\6\0\12\u04a6\14\0\1\u02c9\160\0\4\u04a7"+ + "\2\0\1\u04a7\15\0\1\u04a7\6\0\12\u04a7\1\u0492\13\0"+ + "\1\u02c9\160\0\4\u04a6\2\0\1\u04a6\15\0\1\u04a6\6\0"+ + "\12\u04a6\174\0\1\u0316\4\u04a7\2\0\1\u04a7\15\0\1\u04a7"+ + "\6\0\12\u04a8\1\u0492\13\0\1\u02c9\157\0\1\u0316\4\u04a7"+ + "\2\0\1\u04a7\15\0\1\u04a7\6\0\12\u04a7\1\u0492\13\0"+ + "\1\u02c9\157\0\1\u0316\4\u04a7\2\0\1\u04a7\15\0\1\u04a7"+ + "\6\0\2\u04a8\1\u04a7\2\u04a8\2\u04a7\2\u04a8\1\u04a7\1\u0492"+ + "\13\0\1\u02c9\225\0\1\u0465\13\0\1\u02c9\225\0\1\u046c"+ + "\174\0\4\u04a9\2\0\1\u04a9\15\0\1\u04a9\6\0\12\u04a9"+ + "\1\u0499\174\0\4\u049d\2\0\1\u049d\15\0\1\u049d\6\0"+ + "\12\u049d\1\u043b\173\0\1\u02d6\4\u04a9\2\0\1\u04a9\15\0"+ + "\1\u04a9\6\0\12\u04aa\1\u0499\173\0\1\u02d6\4\u04a9\2\0"+ + "\1\u04a9\15\0\1\u04a9\6\0\12\u04a9\1\u0499\173\0\1\u02d6"+ + "\4\u04a9\2\0\1\u04a9\15\0\1\u04a9\6\0\2\u04aa\1\u04a9"+ + "\2\u04aa\2\u04a9\2\u04aa\1\u04a9\1\u0499\174\0\4\u04ab\2\0"+ + "\1\u04ab\15\0\1\u04ab\6\0\12\u04ab\14\0\1\u0286\160\0"+ + "\4\u04ac\2\0\1\u04ac\15\0\1\u04ac\6\0\12\u04ac\1\u0486"+ + "\13\0\1\u0286\157\0\1\u02d6\4\u04ac\2\0\1\u04ac\15\0"+ + "\1\u04ac\6\0\12\u04ac\1\u0486\13\0\1\u0286\225\0\1\u0478"+ + "\174\0\4\u04ad\2\0\1\u04ad\15\0\1\u04ad\6\0\12\u04ad"+ + "\1\u04a2\174\0\4\u04a6\2\0\1\u04a6\15\0\1\u04a6\6\0"+ + "\12\u04a6\1\u0451\173\0\1\u0316\4\u04ad\2\0\1\u04ad\15\0"+ + "\1\u04ad\6\0\12\u04ae\1\u04a2\173\0\1\u0316\4\u04ad\2\0"+ + "\1\u04ad\15\0\1\u04ad\6\0\12\u04ad\1\u04a2\173\0\1\u0316"+ + "\4\u04ad\2\0\1\u04ad\15\0\1\u04ad\6\0\2\u04ae\1\u04ad"+ + "\2\u04ae\2\u04ad\2\u04ae\1\u04ad\1\u04a2\174\0\4\u04af\2\0"+ + "\1\u04af\15\0\1\u04af\6\0\12\u04af\14\0\1\u02c9\160\0"+ + "\4\u04b0\2\0\1\u04b0\15\0\1\u04b0\6\0\12\u04b0\1\u0492"+ + "\13\0\1\u02c9\157\0\1\u0316\4\u04b0\2\0\1\u04b0\15\0"+ + "\1\u04b0\6\0\12\u04b0\1\u0492\13\0\1\u02c9\160\0\4\u04b1"+ + "\2\0\1\u04b1\15\0\1\u04b1\6\0\12\u04b1\1\u0499\173\0"+ + "\1\u02d6\4\u04b1\2\0\1\u04b1\15\0\1\u04b1\6\0\12\u04b1"+ + "\1\u0499\174\0\4\u04b2\2\0\1\u04b2\15\0\1\u04b2\6\0"+ + "\12\u04b2\14\0\1\u0286\225\0\1\u0486\13\0\1\u0286\160\0"+ + "\4\u04b3\2\0\1\u04b3\15\0\1\u04b3\6\0\12\u04b3\1\u04a2"+ + "\173\0\1\u0316\4\u04b3\2\0\1\u04b3\15\0\1\u04b3\6\0"+ + "\12\u04b3\1\u04a2\174\0\4\u04b4\2\0\1\u04b4\15\0\1\u04b4"+ + "\6\0\12\u04b4\14\0\1\u02c9\225\0\1\u0492\13\0\1\u02c9"+ + "\225\0\1\u0499\174\0\4\u043b\2\0\1\u043b\15\0\1\u043b"+ + "\6\0\12\u043b\14\0\1\u0286\225\0\1\u04a2\174\0\4\u0451"+ + "\2\0\1\u0451\15\0\1\u0451\6\0\12\u0451\14\0\1\u02c9"+ + "\11\0"; private static int [] zzUnpackTrans() { - int [] result = new int[80884]; + int [] result = new int[192294]; int offset = 0; offset = zzUnpackTrans(ZZ_TRANS_PACKED_0, offset, result); return result; @@ -2298,23 +2953,23 @@ public final class UAX29URLEmailTokenizer extends Tokenizer { private static final int [] ZZ_ATTRIBUTE = zzUnpackAttribute(); private static final String ZZ_ATTRIBUTE_PACKED_0 = - "\1\0\1\11\17\1\2\11\1\1\1\0\1\1\1\0"+ - "\1\1\6\0\2\1\1\0\3\1\1\0\1\1\1\0"+ - "\4\1\11\0\32\1\3\0\4\1\32\0\4\1\10\0"+ - "\1\11\1\0\23\1\2\0\1\1\1\0\7\1\3\0"+ - "\2\1\1\0\4\1\1\0\2\1\1\0\2\1\10\0"+ - "\1\1\32\0\1\1\1\0\11\1\1\0\1\1\2\0"+ - "\1\1\1\0\1\1\10\0\3\1\15\0\11\1\3\0"+ - "\2\1\1\0\4\1\1\0\4\1\1\0\2\1\1\0"+ - "\2\1\1\0\3\1\7\0\2\1\20\0\1\1\10\0"+ - "\1\1\3\0\1\1\32\0\3\1\23\0\1\1\27\0"+ - "\1\1\4\0\1\1\6\0\1\1\4\0\2\1\36\0"+ - "\1\1\51\0\1\1\42\0\1\1\50\0\1\1\122\0"+ - "\1\1\116\0\1\1\107\0\1\1\74\0\1\1\51\0"+ - "\1\1\333\0"; + "\1\0\1\11\27\1\2\11\12\1\15\0\1\1\1\0"+ + "\1\1\10\0\1\1\21\0\2\1\1\0\3\1\1\0"+ + "\1\1\1\0\4\1\46\0\32\1\3\0\4\1\32\0"+ + "\4\1\17\0\1\11\1\0\23\1\2\0\1\1\1\0"+ + "\7\1\3\0\2\1\1\0\4\1\1\0\2\1\1\0"+ + "\2\1\10\0\1\1\32\0\1\1\1\0\11\1\1\0"+ + "\1\1\2\0\1\1\1\0\1\1\10\0\3\1\15\0"+ + "\11\1\3\0\2\1\1\0\4\1\1\0\4\1\1\0"+ + "\2\1\1\0\2\1\1\0\3\1\7\0\2\1\20\0"+ + "\1\1\10\0\1\1\3\0\1\1\32\0\3\1\23\0"+ + "\1\1\27\0\1\1\4\0\1\1\6\0\1\1\4\0"+ + "\2\1\36\0\1\1\51\0\1\1\42\0\1\1\51\0"+ + "\1\1\122\0\1\1\117\0\1\1\107\0\1\1\74\0"+ + "\1\1\51\0\1\1\333\0"; private static int [] zzUnpackAttribute() { - int [] result = new int[1117]; + int [] result = new int[1204]; int offset = 0; offset = zzUnpackAttribute(ZZ_ATTRIBUTE_PACKED_0, offset, result); return result; @@ -2540,7 +3195,7 @@ public final class UAX29URLEmailTokenizer extends Tokenizer { char [] map = new char[0x10000]; int i = 0; /* index in packed string */ int j = 0; /* index in unpacked array */ - while (i < 2336) { + while (i < 2802) { int count = packed.charAt(i++); char value = packed.charAt(i++); do map[j++] = value; while (--count > 0); @@ -2819,36 +3474,36 @@ public final class UAX29URLEmailTokenizer extends Tokenizer { zzMarkedPos = zzMarkedPosL; switch (zzAction < 0 ? zzAction : ZZ_ACTION[zzAction]) { - case 8: - { if (populateAttributes(URL_TYPE)) return true; + case 7: + { if (populateAttributes(EMAIL_TYPE)) return true; } case 9: break; - case 5: - { if (populateAttributes(IDEOGRAPHIC_TYPE)) return true; - } - case 10: break; - case 1: - { /* Not numeric, word, ideographic, hiragana, or SE Asian -- ignore it. */ - } - case 11: break; - case 3: - { if (populateAttributes(NUMERIC_TYPE)) return true; - } - case 12: break; - case 6: - { if (populateAttributes(HIRAGANA_TYPE)) return true; - } - case 13: break; - case 4: - { if (populateAttributes(SOUTH_EAST_ASIAN_TYPE)) return true; - } - case 14: break; case 2: { if (populateAttributes(WORD_TYPE)) return true; } + case 10: break; + case 6: + { if (populateAttributes(HIRAGANA_TYPE)) return true; + } + case 11: break; + case 5: + { if (populateAttributes(IDEOGRAPHIC_TYPE)) return true; + } + case 12: break; + case 4: + { if (populateAttributes(SOUTH_EAST_ASIAN_TYPE)) return true; + } + case 13: break; + case 3: + { if (populateAttributes(NUMERIC_TYPE)) return true; + } + case 14: break; + case 1: + { /* Not numeric, word, ideographic, hiragana, or SE Asian -- ignore it. */ + } case 15: break; - case 7: - { if (populateAttributes(EMAIL_TYPE)) return true; + case 8: + { if (populateAttributes(URL_TYPE)) return true; } case 16: break; default: diff --git a/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizer.jflex b/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizer.jflex index fefd9c717cf..7d9dc405c37 100644 --- a/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizer.jflex +++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizer.jflex @@ -45,14 +45,6 @@ import org.apache.lucene.util.AttributeSource; *
  • <IDEOGRAPHIC>: A single CJKV ideographic character
  • *
  • <HIRAGANA>: A single hiragana character
  • * - * WARNING: Because JFlex does not support Unicode supplementary - * characters (characters above the Basic Multilingual Plane, which contains - * those up to and including U+FFFF), this scanner will not recognize them - * properly. If you need to be able to process text containing supplementary - * characters, consider using the ICU4J-backed implementation in modules/analysis/icu - * (org.apache.lucene.analysis.icu.segmentation.ICUTokenizer) - * instead of this class, since the ICU4J-backed implementation does not have - * this limitation. */ %% @@ -70,15 +62,30 @@ import org.apache.lucene.util.AttributeSource; super(in); %init} + +%include src/java/org/apache/lucene/analysis/standard/SUPPLEMENTARY.jflex-macro +ALetter = ([\p{WB:ALetter}] | {ALetterSupp}) +Format = ([\p{WB:Format}] | {FormatSupp}) +Numeric = ([\p{WB:Numeric}] | {NumericSupp}) +Extend = ([\p{WB:Extend}] | {ExtendSupp}) +Katakana = ([\p{WB:Katakana}] | {KatakanaSupp}) +MidLetter = ([\p{WB:MidLetter}] | {MidLetterSupp}) +MidNum = ([\p{WB:MidNum}] | {MidNumSupp}) +MidNumLet = ([\p{WB:MidNumLet}] | {MidNumLetSupp}) +ExtendNumLet = ([\p{WB:ExtendNumLet}] | {ExtendNumLetSupp}) +ComplexContext = ([\p{LB:Complex_Context}] | {ComplexContextSupp}) +Han = ([\p{Script:Han}] | {HanSupp}) +Hiragana = ([\p{Script:Hiragana}] | {HiraganaSupp}) + // UAX#29 WB4. X (Extend | Format)* --> X // -ALetterEx = \p{WB:ALetter} [\p{WB:Format}\p{WB:Extend}]* +ALetterEx = {ALetter} ({Format} | {Extend})* // TODO: Convert hard-coded full-width numeric range to property intersection (something like [\p{Full-Width}&&\p{Numeric}]) once JFlex supports it -NumericEx = [\p{WB:Numeric}\uFF10-\uFF19] [\p{WB:Format}\p{WB:Extend}]* -KatakanaEx = \p{WB:Katakana} [\p{WB:Format}\p{WB:Extend}]* -MidLetterEx = [\p{WB:MidLetter}\p{WB:MidNumLet}] [\p{WB:Format}\p{WB:Extend}]* -MidNumericEx = [\p{WB:MidNum}\p{WB:MidNumLet}] [\p{WB:Format}\p{WB:Extend}]* -ExtendNumLetEx = \p{WB:ExtendNumLet} [\p{WB:Format}\p{WB:Extend}]* +NumericEx = ({Numeric} | [\uFF10-\uFF19]) ({Format} | {Extend})* +KatakanaEx = {Katakana} ({Format} | {Extend})* +MidLetterEx = ({MidLetter} | {MidNumLet}) ({Format} | {Extend})* +MidNumericEx = ({MidNum} | {MidNumLet}) ({Format} | {Extend})* +ExtendNumLetEx = {ExtendNumLet} ({Format} | {Extend})* // URL and E-mail syntax specifications: @@ -348,12 +355,12 @@ EMAIL = {EMAILlocalPart} "@" ({DomainNameStrict} | {EMAILbracketedHost}) // // http://www.unicode.org/reports/tr14/#SA // -\p{LB:Complex_Context}+ { if (populateAttributes(SOUTH_EAST_ASIAN_TYPE)) return true; } +{ComplexContext}+ { if (populateAttributes(SOUTH_EAST_ASIAN_TYPE)) return true; } // UAX#29 WB14. Any ÷ Any // -\p{Script:Han} { if (populateAttributes(IDEOGRAPHIC_TYPE)) return true; } -\p{Script:Hiragana} { if (populateAttributes(HIRAGANA_TYPE)) return true; } +{Han} { if (populateAttributes(IDEOGRAPHIC_TYPE)) return true; } +{Hiragana} { if (populateAttributes(HIRAGANA_TYPE)) return true; } // UAX#29 WB3. CR × LF diff --git a/modules/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymFilter.java b/modules/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymFilter.java index 7b6a5ca45ca..7a80c4312f0 100644 --- a/modules/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymFilter.java +++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymFilter.java @@ -46,6 +46,9 @@ public final class SynonymFilter extends TokenFilter { public SynonymFilter(TokenStream in, SynonymMap map) { super(in); + if (map == null) + throw new IllegalArgumentException("map is required"); + this.map = map; // just ensuring these attributes exist... addAttribute(CharTermAttribute.class); diff --git a/modules/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymMap.java b/modules/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymMap.java index ea8ba38c996..a74b3f8e9e7 100644 --- a/modules/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymMap.java +++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymMap.java @@ -78,6 +78,7 @@ public class SynonymMap { } + @Override public String toString() { StringBuilder sb = new StringBuilder("<"); if (synonyms!=null) { diff --git a/modules/analysis/common/src/java/org/apache/lucene/analysis/util/FilteringTokenFilter.java b/modules/analysis/common/src/java/org/apache/lucene/analysis/util/FilteringTokenFilter.java new file mode 100644 index 00000000000..aa5d41fdc7c --- /dev/null +++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/util/FilteringTokenFilter.java @@ -0,0 +1,96 @@ +package org.apache.lucene.analysis.util; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; + +import org.apache.lucene.analysis.TokenFilter; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; +import org.apache.lucene.queryParser.QueryParser; // for javadoc + +/** + * Abstract base class for TokenFilters that may remove tokens. + * You have to implement {@link #accept} and return a boolean if the current + * token should be preserved. {@link #incrementToken} uses this method + * to decide if a token should be passed to the caller. + */ +public abstract class FilteringTokenFilter extends TokenFilter { + + private final PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class); + private boolean enablePositionIncrements; // no init needed, as ctor enforces setting value! + + public FilteringTokenFilter(boolean enablePositionIncrements, TokenStream input){ + super(input); + this.enablePositionIncrements = enablePositionIncrements; + } + + /** Override this method and return if the current input token should be returned by {@link #incrementToken}. */ + protected abstract boolean accept() throws IOException; + + @Override + public final boolean incrementToken() throws IOException { + if (enablePositionIncrements) { + int skippedPositions = 0; + while (input.incrementToken()) { + if (accept()) { + if (skippedPositions != 0) { + posIncrAtt.setPositionIncrement(posIncrAtt.getPositionIncrement() + skippedPositions); + } + return true; + } + skippedPositions += posIncrAtt.getPositionIncrement(); + } + } else { + while (input.incrementToken()) { + if (accept()) { + return true; + } + } + } + // reached EOS -- return false + return false; + } + + /** + * @see #setEnablePositionIncrements(boolean) + */ + public boolean getEnablePositionIncrements() { + return enablePositionIncrements; + } + + /** + * If true, this TokenFilter will preserve + * positions of the incoming tokens (ie, accumulate and + * set position increments of the removed tokens). + * Generally, true is best as it does not + * lose information (positions of the original tokens) + * during indexing. + * + *

    When set, when a token is stopped + * (omitted), the position increment of the following + * token is incremented. + * + *

    NOTE: be sure to also + * set {@link QueryParser#setEnablePositionIncrements} if + * you use QueryParser to create queries. + */ + public void setEnablePositionIncrements(boolean enable) { + this.enablePositionIncrements = enable; + } +} diff --git a/modules/analysis/common/src/java/org/apache/lucene/analysis/util/StemmerUtil.java b/modules/analysis/common/src/java/org/apache/lucene/analysis/util/StemmerUtil.java index bb5e04fbe7e..7aceadfae78 100644 --- a/modules/analysis/common/src/java/org/apache/lucene/analysis/util/StemmerUtil.java +++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/util/StemmerUtil.java @@ -56,6 +56,25 @@ public class StemmerUtil { return true; } + /** + * Returns true if the character array ends with the suffix. + * + * @param s Input Buffer + * @param len length of input buffer + * @param suffix Suffix string to test + * @return true if s ends with suffix + */ + public static boolean endsWith(char s[], int len, char suffix[]) { + final int suffixLen = suffix.length; + if (suffixLen > len) + return false; + for (int i = suffixLen - 1; i >= 0; i--) + if (s[len -(suffixLen - i)] != suffix[i]) + return false; + + return true; + } + /** * Delete a character in-place * diff --git a/modules/analysis/common/src/resources/org/apache/lucene/analysis/gl/galician.rslp b/modules/analysis/common/src/resources/org/apache/lucene/analysis/gl/galician.rslp new file mode 100644 index 00000000000..33fe8fb1109 --- /dev/null +++ b/modules/analysis/common/src/resources/org/apache/lucene/analysis/gl/galician.rslp @@ -0,0 +1,647 @@ +# Steps file for the RSLP stemmer. + +# Step 1: Plural Reduction +{ "Plural", 3, 1, {"s"}, + # bons -> bon + {"ns",1,"n",{"luns","furatapóns","furatapons"}}, + # xamós -> xamón + {"ós",3,"ón"}, + # balões -> balón + {"ões",3,"ón"}, + # capitães -> capitão + {"ães",1,"ão",{"mães","magalhães"}}, + # normais -> normal + {"ais",2,"al",{"cais","tais","mais","pais","ademais"}}, + {"áis",2,"al",{"cáis","táis", "máis", "páis", "ademáis"}}, + # papéis -> papel + {"éis",2,"el"}, + # posíbeis -> posíbel + {"eis",2,"el"}, + # espanhóis -> espanhol + {"óis",2,"ol",{"escornabóis"}}, + # caracois -> caracol + {"ois",2,"ol",{"escornabois"}}, + # cadrís -> cadril + {"ís",2,"il",{"país"}}, + # cadris -> cadril + {"is",2,"il",{"menfis","pais","kinguis"}}, + # males -> mal + {"les",2,"l",{"ingles","marselles","montreales","senegales","manizales","móstoles","nápoles"}}, + # mares -> mar + {"res",3,"r",{"petres","henares","cáceres","baleares","linares","londres","mieres","miraflores","mércores","venres", "pires"}}, + # luces -> luz + {"ces",2,"z"}, + # luzes -> luz + {"zes",2,"z"}, + # leises -> lei + {"ises",3,"z"}, + # animás -> animal + {"ás",1,"al",{"más"}}, + # gases -> gas + {"ses",2,"s"}, + # casas -> casa + {"s",2,"",{"barbadés","barcelonés","cantonés","gabonés","llanés","medinés","escocés","escocês","francês","barcelonês","cantonês","macramés","reves","barcelones","cantones","gabones","llanes","magallanes","medines","escoces","frances","xoves","martes","aliás","pires","lápis","cais","mais","mas","menos","férias","pêsames","crúcis","país","cangas","atenas","asturias","canarias","filipinas","honduras","molucas","caldas","mascareñas","micenas","covarrubias","psoas","óculos","nupcias","xoves","martes","llanes"}}}; + +{ "Unification", 0, 0, {}, + # cansadísimo -> cansadísimo + {"íssimo",5,"ísimo"}, + # cansadísima -> cansadísima + {"íssima",5,"ísima"}, + # homaço -> homazo + {"aço",4,"azo"}, + # mulheraça -> mulheraza + {"aça",4,"aza"}, + # xentuça -> xentuza + {"uça",4,"uza"}, + # manilhar -> manillar + {"lhar",2,"llar"}, + # colher -> coller + {"lher",2,"ller"}, + # melhor -> mellor + {"lhor",2,"llor"}, + # alho -> allo + {"lho",1,"llo"}, + # linhar -> liñar + {"nhar",2,"ñar"}, + # penhor -> peñor + {"nhor",2,"ñor"}, + # anho -> año + {"nho",1,"ño"}, + # cunha -> cuña + {"nha",1,"ña"}, + # hospitalário -> hospitalario + {"ário",3,"ario"}, + # bibliotecária -> bibliotecaria + {"ária",3,"aria"}, + # agradable -> agradábel + {"able",2,"ábel"}, + # agradávele -> agradábel + {"ável",2,"ábel"}, + # imposible -> imposíbel + {"ible",2,"íbel"}, + # imposível -> imposíbel + {"ível",2,"íbel"}, + # imposiçom -> imposición + {"çom",2,"ción"}, + # garagem -> garaxe + {"agem",2,"axe"}, + # garage -> garaxe + {"age",2,"axe"}, + # impressão -> impressón + {"ão",3,"ón"}, + # irmao -> irmán + {"ao",1,"án"}, + # irmau -> irmán + {"au",1,"án"}, + # garrafom -> garrafón + {"om",3,"ón"}, + # cantem -> canten + {"m",2,"n"}}; + +{ "Adverb", 0, 0, {}, + # felizmente -> feliz + {"mente",4,"",{"experimente","vehemente","sedimente"}}}; + +{ "Augmentative", 0, 1, {}, + # cansadísimo -> cansad + {"dísimo",5}, + # cansadísima -> cansad + {"dísima",5}, + # amabilísimo -> ama + {"bilísimo",3}, + # amabilísima -> ama + {"bilísima",3}, + # fortísimo -> fort + {"ísimo",3}, + # fortísima -> fort + {"ísima",3}, + # centésimo -> cent + {"ésimo",3}, + # centésima -> cent + {"ésima",3}, + # paupérrimo -> paup + {"érrimo",4}, + # paupérrima -> paup + {"érrima",4}, + # charlatana -> charlat + {"ana",2,"",{"argana","banana","choupana","espadana","faciana","iguana","lantana","macana","membrana","mesana","nirvana","obsidiana","palangana","pavana","persiana","pestana","porcelana","pseudomembrana","roldana","sábana","salangana","saragana","ventana"}}, + # charlatán -> charlat + {"án",3,"",{"ademán","bardán","barregán","corricán","curricán","faisán","furacán","fustán","gabán","gabián","galán","gañán","lavacán","mazán","mourán","rabadán","serán","serrán","tabán","titán","tobogán","verán","volcán","volován"}}, + # homazo -> hom + {"azo",4,"",{"abrazo","espazo","andazo","bagazo","balazo","bandazo","cachazo","carazo","denazo","engazo","famazo","lampreazo","pantocazo","pedazo","preñazo","regazo","ribazo","sobrazo","terrazo","trompazo"}}, + # mulleraza -> muller + {"aza",3,"",{"alcarraza","ameaza","baraza","broucaza","burgaza","cabaza","cachaza","calaza","carpaza","carraza","coiraza","colmaza","fogaza","famaza","labaza","liñaza","melaza","mordaza","paraza","pinaza","rabaza","rapaza","trancaza"}}, + # cascallo -> casc + {"allo",4,"",{"traballo"}}, + # xentalla -> xent + {"alla",4}, + # bocarra -> boc + {"arra",3,"",{"cigarra","cinzarra"}}, + # medicastro -> medic + {"astro",3,"",{"balastro","bimbastro","canastro","retropilastro"}}, + # poetastra -> poet + {"astra",3,"",{"banastra","canastra","contrapilastra","piastra","pilastra"}}, + # corpázio -> corp + {"ázio",3,"",{"topázio"}}, + # soutelo -> sout + {"elo",4,"",{"bacelo","barrelo","bicarelo","biquelo","boquelo","botelo","bouquelo","cacarelo","cachelo","cadrelo","campelo","candelo","cantelo","carabelo","carambelo","caramelo","cercelo","cerebelo","chocarelo","coitelo","conchelo","corbelo","cotobelo","couselo","destelo","desvelo","esfácelo","fandelo","fardelo","farelo","farnelo","flabelo","ganchelo","garfelo","involucelo","mantelo","montelo","outerelo","padicelo","pesadelo","pinguelo","piquelo","rampelo","rastrelo","restelo","tornecelo","trabelo","restrelo","portelo","ourelo","zarapelo"}}, + # avioneta -> avion + {"eta",3,"",{"arqueta","atleta","avoceta","baioneta","baldeta","banqueta","barraganeta","barreta","borleta","buceta","caceta","calceta","caldeta","cambeta","canaleta","caneta","carreta","cerceta","chaparreta","chapeta","chareta","chincheta","colcheta","cometa","corbeta","corveta","cuneta","desteta","espeta","espoleta","estafeta","esteta","faceta","falanxeta","frasqueta","gaceta","gabeta","galleta","garabeta","gaveta","glorieta","lagareta","lambeta","lanceta","libreta","maceta","macheta","maleta","malleta","mareta","marreta","meseta","mofeta","muleta","peseta","planeta","raqueta","regreta","saqueta","veleta","vendeta","viñeta"}}, + # guapete -> guap + {"ete",3,"",{"alfinete","ariete","bacinete","banquete","barallete","barrete","billete","binguelete","birrete","bonete","bosquete","bufete","burlete","cabalete","cacahuete","cavinete","capacete","carrete","casarete","casete","chupete","clarinete","colchete","colete","capete","curupete","disquete","estilete","falsete","ferrete","filete","gallardete","gobelete","inglete","machete","miquelete","molete","mosquete","piquete","ribete","rodete","rolete","roquete","sorvete","vedete","vendete"}}, + # práctica -> práct + {"ica",3,"",{"andarica","botánica","botica","dialéctica","dinámica","física","formica","gráfica","marica","túnica"}}, + # práctico -> práct + {"ico",3,"",{"conico","acetifico","acidifico"}}, + # trapexo -> trap + {"exo",3,"",{"arpexo","arquexo","asexo","axexo","azulexo","badexo","bafexo","bocexo","bosquexo","boubexo","cacarexo","carrexo","cascarexo","castrexo","convexo","cotexo","desexo","despexo","forcexo","gabexo","gargarexo","gorgolexo","inconexo","manexo","merexo","narnexo","padexo","patexo","sopexo","varexo"}}, + {"exa",3,"",{"airexa","bandexa","carrexa","envexa","igrexa","larexa","patexa","presexa","sobexa"}}, + # multidão -> mult + {"idão",3}, + # pequeniño -> pequeno + {"iño",3,"o",{"camiño","cariño","comiño","golfiño","padriño","sobriño","viciño","veciño"}}, + # pequeniña -> pequena + {"iña",3,"a",{"camariña","campiña","entreliña","espiña","fariña","moriña","valiña"}}, + # grandito -> grand + {"ito",3,""}, + # grandita -> grand + {"ita",3,""}, + # anomaloide -> animal + {"oide",3,"",{"anaroide","aneroide","asteroide","axoide","cardioide","celuloide","coronoide","discoide","espermatozoide","espiroide","esquizoide","esteroide","glenoide","linfoide","hemorroide","melaloide","sacaroide","tetraploide","varioloide"}}, + # cazola -> caz + {"ola",3,"",{"aixola","ampola","argola","arola","arteríola","bandola","bítola","bractéola","cachola","carambola","carapola","carola","carrandiola","catrapola","cebola","centola","champola","chatola","cirola","cítola","consola","corola","empola","escarola","esmola","estola","fitola","florícola","garañola","gárgola","garxola","glicocola","góndola","mariola","marola","michola","pirola","rebola","rupícola","saxícola","sémola","tachola","tómbola"}}, + # pedrolo -> pedr + {"olo",3,"",{"arrolo","babiolo","cacharolo","caixarolo","carolo","carramolo","cascarolo","cirolo","codrolo","correolo","cotrolo","desconsolo","rebolo","repolo","subsolo","tixolo","tómbolo","torolo","trémolo","vacúolo","xermolo","zócolo"}}, + # vellote -> vell + {"ote",3,"",{"aigote","alcaiote","barbarote","balote","billote","cachote","camarote","capote","cebote","chichote","citote","cocorote","escote","gañote","garrote","gavote","lamote","lapote","larapote","lingote","lítote","magote","marrote","matalote","pandote","paparote","rebote","tagarote","zarrote"}}, + # mozota -> moz + {"ota",3,"",{"asíntota","caiota","cambota","chacota","compota","creosota","curota","derrota","díspota","gamota","maniota","pelota","picota","pillota","pixota","queirota","remota"}}, + # gordocho -> gord + {"cho",3,"",{"abrocho","arrocho","carocho","falucho","bombacho","borracho","mostacho"}}, + # gordecha -> gord + {"cha",3,"",{"borracha","carracha","estacha","garnacha","limacha","remolacha","abrocha"}}, + # baratuco -> barat + {"uco",4,"",{"caduco","estuco","fachuco","malluco","saluco","trabuco"}}, + # borrachuzo -> borrach + {"uzo",3,"",{"carriñouzo","fachuzo","mañuzo","mestruzo","tapuzo"}}, + # xentuza -> xent + {"uza",3,"",{"barruza","chamuza","chapuza","charamuza","conduza","deduza","desluza","entreluza","induza","reluza","seduza","traduza","trasluza"}}, + # babuxa -> bab + {"uxa",3,"",{"caramuxa","carrabouxa","cartuxa","coruxa","curuxa","gaturuxa","maruxa","meruxa","miruxa","moruxa","muruxa","papuxa","rabuxa","trouxa"}}, + {"uxo",3,"",{"caramuxo","carouxo","carrabouxo","curuxo","debuxo","ganduxo","influxo","negouxo","pertuxo","refluxo"}}, + # grupello -> grup + {"ello",3,"",{"alborello","artello","botello","cachafello","calello","casarello","cazabello","cercello","cocerello","concello","consello","desparello","escaravello","espello","fedello","fervello","gagafello","gorrobello","nortello","pendello","troupello","trebello"}}, + # pontella -> pont + {"ella",3,"",{"alborella","bertorella","bocatella","botella","calella","cercella","gadella","grosella","lentella","movella","nocella","noitevella","parella","pelella","percebella","segorella","sabella"}}}; + +{ "Noun", 0, 0, {}, + # lealdade -> leal + {"dade",3,"",{"acridade","calidade"}}, + # clarificar -> clar + {"ificar",2}, + # brasileiro->brasil + {"eiro",3,"",{"agoireiro","bardalleiro","braseiro","barreiro","canteiro","capoeiro","carneiro","carteiro","cinceiro","faroleiro","mareiro","preguiceiro","quinteiro","raposeiro","retranqueiro","regueiro","sineiro","troleiro","ventureiro"}}, + # marisqueira -> marisqu + {"eira",3,"",{"cabeleira","canteira","cocheira","folleira","milleira"}}, + # hospitalario -> hospital + {"ario",3,"",{"armario","calcario","lionario","salario"}}, + # bibliotecaria -> bibliotec + {"aria",3,"",{"cetaria","coronaria","fumaria","linaria","lunaria","parietaria","saponaria","serpentaria"}}, + # humorístico -> humor + {"ístico",3,"",{"balístico", "ensaístico"}}, + # castrista -> castr + {"ista",3,"",{"batista","ciclista","fadista","operista","tenista","verista"}}, + # lavado -> lav + {"ado",2,"",{"grado","agrado"}}, + # decanato -> decan + {"ato",2,"",{"agnato"}}, + # xemido -> xem + {"ido",3,"",{"cándido","cândido","consolido","decidido","duvido","marido","rápido"}}, + # mantida -> mant + {"ida",3,"",{"bastida","dúbida","dubida","duvida","ermida","éxida","guarida","lapicida","medida","morida"}}, + {"ída",3}, + # mantído -> mant + {"ido",3}, + # orelludo -> orell + {"udo",3,"",{"estudo","escudo"}}, + # orelluda -> orell + {"uda",3}, + {"ada",3,"",{"abada","alhada","allada","pitada"}}, + # comedela -> come + {"dela",3,"",{"cambadela","cavadela","forcadela","erisipidela","mortadela","espadela","fondedela","picadela","arandela","candela","cordela","escudela","pardela"}}, + # fontela -> font + {"ela",3,"",{"canela","capela","cotela","cubela","curupela","escarapela","esparrela","estela","fardela","flanela","fornela","franela","gabela","gamela","gavela","glumela","granicela","lamela","lapela","malvela","manela","manganela","mexarela","micela","mistela","novela","ourela","panela","parcela","pasarela","patamela","patela","paxarela","pipela","pitela","postela","pubela","restela","sabela","salmonela","secuela","sentinela","soldanela","subela","temoncela","tesela","tixela","tramela","trapela","varela","vitela","xanela","xestela"}}, + # agradábel -> agrad + {"ábel",2,"",{"afábel","fiábel"}}, + # combustíbel -> combust + {"íbel",2,"",{"críbel","imposíbel","posíbel","fisíbel","falíbel"}}, + # fabricante -> frabrica + {"nte",3,"",{"alimente","adiante","acrescente","elefante","frequente","freqüente","gigante","instante","oriente","permanente","posante","possante","restaurante"}}, + # ignorancia -> ignora + {"ncia",3}, + # temperanza -> tempera + {"nza",3}, + {"acia",3,"",{"acracia","audacia","falacia","farmacia"}}, + # inmundicia -> inmund + {"icia",3,"",{"caricia","delicia","ledicia","malicia","milicia","noticia","pericia","presbicia","primicia","regalicia","sevicia","tiricia"}}, + # xustiza -> xust + {"iza",3,"",{"alvariza","baliza","cachiza","caniza","cañiza","carbaliza","carriza","chamariza","chapiza","fraguiza","latiza","longaniza","mañiza","nabiza","peliza","preguiza","rabiza"}}, + # clarexar -> clar + {"exar",3,"",{"palmexar"}}, + # administración -> administr + {"ación",2,"",{"aeración"}}, + # expedición -> exped + {"ición",3,"",{"condición","gornición","monición","nutrición","petición","posición","sedición","volición"}}, + # excepción -> except + {"ción",3,"t"}, + # comprensión -> comprens + {"sión",3,"s",{"abrasión", "alusión"}}, + # doazón -> do + {"azón",2,"",{"armazón"}}, + # garrafón -> garraf + {"ón",3,"",{"abalón","acordeón","alción","aldrabón","alerón","aliñón","ambón","bombón","calzón","campón","canalón","cantón","capitón","cañón","centón","ciclón","collón","colofón","copón","cotón","cupón","petón","tirón","tourón","turón","unción","versión","zubón","zurrón"}}, + # lambona -> lamb + {"ona",3,"",{"abandona","acetona","aleurona","amazona","anémona","bombona","cambona","carona","chacona","charamona","cincona","condona","cortisona","cretona","cretona","detona","estona","fitohormona","fregona","gerona","hidroquinona","hormona","lesiona","madona","maratona","matrona","metadona","monótona","neurona","pamplona","peptona","poltrona","proxesterona","quinona","quinona","silicona","sulfona"}}, + # bretoa -> bretón + {"oa",3,"",{"abandoa","madroa","barbacoa","estoa","airoa","eiroa","amalloa","ámboa","améndoa","anchoa","antinéboa","avéntoa","avoa","bágoa","balboa","bisavoa","boroa","canoa","caroa","comadroa","coroa","éngoa","espácoa","filloa","fírgoa","grañoa","lagoa","lanzoa","magoa","mámoa","morzoa","noiteboa","noraboa","parañoa","persoa","queiroa","rañoa","táboa","tataravoa","teiroa"}}, + # demoníaco -> demoní + {"aco",3}, + # demoníaca -> demoní + {"aca",3,"",{"alpaca","barraca","bullaca","buraca","carraca","casaca","cavaca","cloaca","entresaca","ervellaca","espinaca","estaca","farraca","millaca","pastinaca","pataca","resaca","urraca","purraca"}}, + # carballal -> carball + {"al",4,"",{"afinal","animal","estatal","bisexual","bissexual","desleal","fiscal","formal","pessoal","persoal","liberal","postal","virtual","visual","pontual","puntual","homosexual","heterosexual"}}, + # nadador -> nada + {"dor",2,"",{"abaixador"}}, + # benfeitor -> benfei + {"tor",3,"",{"autor","motor","pastor","pintor"}}, + # produtor -> produt + {"or",2,"",{"asesor","assessor","favor","mellor","melhor","redor","rigor","sensor","tambor","tumor"}}, + # profesora -> profes + {"ora",3,"",{"albacora","anáfora","áncora","apisoadora","ardora","ascospora","aurora","avéspora","bitácora","canéfora","cantimplora","catáfora","cepilladora","demora","descalcificadora","diáspora","empacadora","epífora","ecavadora","escora","eslora","espora","fotocompoñedora","fotocopiadora","grampadora","isícora","lavadora","lixadora","macrospora","madrépora","madrágora","masora","mellora","metáfora","microspora","milépora","milpéndora","nécora","oospora","padeadora","pasiflora","pécora","píldora","pólvora","ratinadora","rémora","retroescavadora","sófora","torradora","trémbora","uredospora","víbora","víncora","zoospora"}}, + # zapataría -> zapat + {"aría",3,"",{"libraría"}}, + # etiquetaxe -> etiquet + {"axe",3,"",{"aluaxe","amaraxe","amperaxe","bagaxe","balaxe","barcaxe","borraxe","bescaxe","cabotaxe","carraxe","cartilaxe","chantaxe","colaxe","coraxe","carruaxe","dragaxe","embalaxe","ensilaxe","epistaxe","fagundaxe","fichaxe","fogaxe","forraxe","fretaxe","friaxe","garaxe","homenaxe","leitaxe","liñaxe","listaxe","maraxe","marcaxe","maridaxe","masaxe","miraxe","montaxe","pasaxe","peaxe","portaxe","ramaxe","rebelaxe","rodaxe","romaxe","sintaxe","sondaxe","tiraxe","vantaxe","vendaxe","viraxe"}}, + # movedizo -> move + {"dizo",3}, + # limpeza -> limp + {"eza",3,"",{"alteza","beleza","fereza","fineza","vasteza","vileza"}}, + # rixidez -> rixid + {"ez",3,"",{"acidez","adultez","adustez","avidez","candidez","mudez","nenez","nudez","pomez"}}, + # mullerengo -> muller + {"engo",3}, + # chairego -> chair + {"ego",3,"",{"corego","derrego","entrego","lamego","sarego","sartego"}}, + # cariñoso -> cariñ + {"oso",3,"",{"afanoso","algoso","caldoso","caloso","cocoso","ditoso","favoso","fogoso","lamoso","mecoso","mocoso","precioso","rixoso","venoso","viroso","xesoso"}}, + # cariñosa -> cariñ + {"osa",3,"",{"mucosa","glicosa","baldosa","celulosa","isoglosa","nitrocelulosa","levulosa","ortosa","pectosa","preciosa","sacarosa","serosa","ventosa"}}, + # negrume -> negr + {"ume",3,"",{"agrume","albume","alcume","batume","cacume","cerrume","chorume","churume","costume","curtume","estrume","gafume","legume","perfume","queixume","zarrume"}}, + # altura -> alt + {"ura",3,"",{"albura","armadura","imatura","costura"}}, + # cuspiñar -> cusp + {"iñar",3}, + # febril -> febr + {"il",3,"",{"abril","alfil","anil","atril","badil","baril","barril","brasil","cadril","candil","cantil","carril","chamil","chancil","civil","cubil","dátil","difícil","dócil","edil","estéril","fácil","fráxil","funil","fusil","grácil","gradil","hábil","hostil","marfil"}}, + # principesco -> princip + {"esco",4}, + # mourisco -> mour + {"isco",4}, + # esportivo -> esport + {"ivo",3,"",{"pasivo","positivo","passivo","possessivo","posesivo","pexotarivo","relativo"}}}; + +{ "Verb", 0, 0, {}, + # amaba -> am + {"aba",2}, + # andabade -> and + {"abade",2}, + # andábade -> and + {"ábade",2}, + # chorabamo -> chor + {"abamo",2}, + # chorábamo -> chor + {"ábamo",2}, + # moraban -> morab + {"aban",2}, + # andache -> and + {"ache",2}, + # andade -> and + {"ade",2}, + {"an",2}, + # cantando -> cant + {"ando",2}, + # cantar -> cant + {"ar",2,"",{"azar","bazar","patamar"}}, + # lembrarade -> lembra + {"arade",2}, + {"aramo",2}, + {"arán",2}, + # cantaran -> cant + {"aran",2}, + # convidárade -> convid + {"árade",2}, + # convidaría -> convid + {"aría",2}, + # cantariade -> cant + {"ariade",2}, + # cantaríade -> cant + {"aríade",2}, + # cantarian -> cant + {"arian",2}, + # cantariamo -> cant + {"ariamo",2}, + # pescaron -> pesc + {"aron",2}, + # cantase -> cant + {"ase",2}, + # cantasede -> cant + {"asede",2}, + # cantásede -> cant + {"ásede",2}, + # cantasemo -> cant + {"asemo",2}, + # cantásemo -> cant + {"ásemo",2}, + # cantasen -> cant + {"asen",2}, + # loitavan -> loitav + {"avan",2}, + # cantaríamo -> cant + {"aríamo",2}, + # cantassen -> cant + {"assen",2}, + # cantássemo -> cant + {"ássemo",2}, + # beberíamo -> beb + {"eríamo",2}, + # bebêssemo -> beb + {"êssemo",2}, + # partiríamo -> part + {"iríamo",3}, + # partíssemo -> part + {"íssemo",3}, + # cantáramo -> cant + {"áramo",2}, + # cantárei -> cant + {"árei",2}, + # cantaren -> cant + {"aren",2}, + # cantaremo -> cant + {"aremo",2}, + # cantaríei -> cant + {"aríei",2}, + {"ássei",2}, + # cantávamo-> cant + {"ávamo",2}, + # bebêramo -> beb + {"êramo",1}, + # beberemo -> beb + {"eremo",1}, + # beberíei -> beb + {"eríei",1}, + # bebêssei -> beb + {"êssei",1}, + # partiríamo -> part + {"íramo",3}, + # partiremo -> part + {"iremo",3}, + # partiríei -> part + {"iríei",3}, + # partíssei -> part + {"íssei",3}, + # partissen -> part + {"issen",3}, + # bebendo -> beb + {"endo",1}, + # partindo -> part + {"indo",3}, + # propondo -> prop + {"ondo",3}, + # cantarde -> cant + {"arde",2}, + # cantarei -> cant + {"arei",2}, + # cantaria -> cant + {"aria",2}, + # cantarmo -> cant + {"armo",2}, + # cantasse -> cant + {"asse",2}, + {"aste",2}, + # cantávei -> cant + {"ávei",2}, + # perderão -> perd + {"erão",1}, + # beberde -> beb + {"erde",1}, + # beberei -> beb + {"erei",1}, + # bebêrei -> beb + {"êrei",1}, + # beberen -> beb + {"eren",2}, + # beberia -> beb + {"eria",1}, + # bebermo -> beb + {"ermo",1}, + # bebeste -> beb + {"este",1,"",{"faroeste","agreste"}}, + # bebíamo -> beb + {"íamo",1}, + # fuxian -> fux + {"ian",2,"",{"enfian","eloxian","ensaian"}}, + # partirde -> part + {"irde",2}, + # partírei -> part + {"irei",3,"",{"admirei"}}, + # partiren -> part + {"iren",3}, + # partiria -> part + {"iria",3}, + # partirmo -> part + {"irmo",3}, + # partisse -> part + {"isse",3}, + # partiste -> part + {"iste",4}, + {"iava",1,"",{"ampliava"}}, + # cantamo -> cant + {"amo",2}, + # funciona -> func + {"iona",3}, + # cantara -> cant + {"ara",2,"",{"arara","prepara"}}, + # enviará -> envi + {"ará",2,"",{"alvará","bacará"}}, + # cantare -> cant + {"are",2,"",{"prepare"}}, + # cantava -> cant + {"ava",2,"",{"agrava"}}, + # cantemo -> cant + {"emo",2}, + # bebera -> beb + {"era",1,"",{"acelera","espera"}}, + # beberá -> beb + {"erá",1}, + # bebere -> beb + {"ere",1,"",{"espere"}}, + # bebíei -> beb + {"íei",1}, + # metin -> met + {"in",3}, + # partimo -> part + {"imo",3,"",{"reprimo","intimo","íntimo","nimo","queimo","ximo"}}, + # partira -> part + {"ira",3,"",{"fronteira","sátira"}}, + {"ído",3}, + # partirá -> part + {"irá",3}, + # concretizar -> concret + {"tizar",4,"",{"alfabetizar"}}, + {"izar",3,"",{"organizar"}}, + # saltitar -> salt + {"itar",5,"",{"acreditar","explicitar","estreitar"}}, + # partire -> part + {"ire",3,"",{"adquire"}}, + # compomo -> comp + {"omo",3}, + {"ai",2}, + # barbear -> barb + {"ear",4,"",{"alardear","nuclear"}}, + # cheguei -> cheg + {"uei",3}, + {"uía",5,"u"}, + # cantei -> cant + {"ei",3}, + # beber -> beb + {"er",1,"",{"éter","pier"}}, + # bebeu -> beb + {"eu",1,"",{"chapeu"}}, + # bebia -> beb + {"ia",1,"",{"estória","fatia","acia","praia","elogia","mania","lábia","aprecia","polícia","arredia","cheia","ásia"}}, + # partir -> part + {"ir",3}, + # partiu -> part + {"iu",3}, + # fraqueou -> fraqu + {"eou",5}, + # chegou -> cheg + {"ou",3}, + # bebi -> beb + {"i",1}, + # varrede -> varr + {"ede",1,"",{"rede","bípede","céspede","parede","palmípede","vostede","hóspede","adrede"}}, + # cantei -> cant + {"ei",3}, + # anden -> and + {"en",2}, + # descerade -> desc + {"erade",1}, + # vivérade -> viv + {"érade",1}, + # beberan -> beb + {"eran",2}, + # colleramo -> coller + {"eramo",1}, + # bebéramo -> beb + {"éramo",1}, + # perderán -> perd + {"erán",1}, + # varrería -> varr + {"ería",1}, + # beberiade -> beb + {"eriade",1}, + # beberíade -> beb + {"eríade",1}, + # beberiamo -> beb + {"eriamo",1}, + # beberian -> beb + {"erian",1}, + # beberían -> beb + {"erían",1}, + # perderon -> perd + {"eron",1}, + # bebese -> beb + {"ese",1}, + # bebesedes -> beb + {"esedes",1}, + # bebésedes -> beb + {"ésedes",1}, + # bebesemo -> beb + {"esemo",1}, + # bebésemo -> beb + {"ésemo",1}, + # bebesen -> beb + {"esen",1}, + # bebêssede -> beb + {"êssede",1}, + # chovía -> chov + {"ía",1}, + # faciade -> fac + {"iade",1}, + # facíade -> fac + {"íade",1}, + # perdiamo -> perd + {"iamo",1}, + # fuxían -> fux + {"ían",1}, + # corriche -> corr + {"iche",1}, + # partide -> part + {"ide",1}, + # escribirade -> escrib + {"irade",3}, + # parírade -> par + {"írade",3}, + # partiramo -> part + {"iramo",3}, + # fugirán -> fug + {"irán",3}, + # viviría -> viv + {"iría",3}, + # partiriade -> part + {"iriade",3}, + # partiríade -> part + {"iríade",3}, + # partiriamo -> part + {"iriamo",3}, + # partirian -> part + {"irian",3}, + # partirían -> part + {"irían",3}, + # reflectiron -> reflect + {"iron",3}, + # partise -> part + {"ise",3}, + # partisede -> part + {"isede",3}, + # partísede -> part + {"ísede",3}, + # partisemo -> part + {"isemo",3}, + # partísemo -> part + {"ísemo",3}, + # partisen -> part + {"isen",3}, + # partíssede -> part + {"íssede",3}, + {"tizar",3,"",{"alfabetizar"}}, + {"ondo",3}}; + +{ "Vowel", 0, 0, {}, + # segue -> seg + {"gue",2,"g",{"azougue","dengue","merengue","nurague","merengue","rengue"}}, + {"que",2,"c",{"alambique","albaricoque","abaroque","alcrique","almadraque","almanaque","arenque","arinque","baduloque","ballestrinque","betoque","bivaque","bloque","bodaque","bosque","breque","buque","cacique","cheque","claque","contradique","coque","croque","dique","duque","enroque","espeque","estoque","estoraque","estraloque","estrinque","milicroque","monicreque","orinque","arinque","palenque","parque","penique","picabeque","pique","psique","raque","remolque","xeque","repenique","roque","sotobosque","tabique","tanque","toque","traque","truque","vivaque","xaque"}}, + {"a",3,"",{"amasadela","cerva"}}, + {"e",3,"",{"marte"}}, + {"o",3,"",{"barro","fado","cabo","libro","cervo"}}, + {"â",3}, + {"ã",3,"",{"amanhã","arapuã","fã","divã","manhã"}}, + {"ê",3}, + {"ô",3}, + {"á",3}, + {"é",3}, + {"ó",3}, + # munxi -> munx + {"i",3}}; diff --git a/modules/analysis/common/src/resources/org/apache/lucene/analysis/gl/stopwords.txt b/modules/analysis/common/src/resources/org/apache/lucene/analysis/gl/stopwords.txt new file mode 100644 index 00000000000..d8760b12c14 --- /dev/null +++ b/modules/analysis/common/src/resources/org/apache/lucene/analysis/gl/stopwords.txt @@ -0,0 +1,161 @@ +# galican stopwords +a +aínda +alí +aquel +aquela +aquelas +aqueles +aquilo +aquí +ao +aos +as +así +á +ben +cando +che +co +coa +comigo +con +connosco +contigo +convosco +coas +cos +cun +cuns +cunha +cunhas +da +dalgunha +dalgunhas +dalgún +dalgúns +das +de +del +dela +delas +deles +desde +deste +do +dos +dun +duns +dunha +dunhas +e +el +ela +elas +eles +en +era +eran +esa +esas +ese +eses +esta +estar +estaba +está +están +este +estes +estiven +estou +eu +é +facer +foi +foron +fun +había +hai +iso +isto +la +las +lle +lles +lo +los +mais +me +meu +meus +min +miña +miñas +moi +na +nas +neste +nin +no +non +nos +nosa +nosas +noso +nosos +nós +nun +nunha +nuns +nunhas +o +os +ou +ó +ós +para +pero +pode +pois +pola +polas +polo +polos +por +que +se +senón +ser +seu +seus +sexa +sido +sobre +súa +súas +tamén +tan +te +ten +teñen +teño +ter +teu +teus +ti +tido +tiña +tiven +túa +túas +un +unha +unhas +uns +vos +vosa +vosas +voso +vosos +vós diff --git a/modules/analysis/common/src/resources/org/apache/lucene/analysis/pt/portuguese.rslp b/modules/analysis/common/src/resources/org/apache/lucene/analysis/pt/portuguese.rslp new file mode 100644 index 00000000000..24de0653803 --- /dev/null +++ b/modules/analysis/common/src/resources/org/apache/lucene/analysis/pt/portuguese.rslp @@ -0,0 +1,456 @@ +# Steps file for the RSLP stemmer. + +# Step 1: Plural Reduction +{ "Plural", 3, 1, {"s"}, + # bons -> bom + {"ns",1,"m"}, + # balões -> balão + {"ões",3,"ão"}, + # capitães -> capitão + {"ães",1,"ão",{"mães"}}, + # normais -> normal + {"ais",1,"al",{"cais","mais"}}, + # papéis -> papel + {"éis",2,"el"}, + # amáveis -> amável + {"eis",2,"el"}, + # lençóis -> lençol + {"óis",2,"ol"}, + # barris -> barril + {"is",2,"il",{"lápis","cais","mais","crúcis","biquínis","pois","depois","dois","leis"}}, + # males -> mal + {"les",3,"l"}, + # mares -> mar + {"res",3,"r", {"árvores"}}, + # casas -> casa + {"s",2,"",{"aliás","pires","lápis","cais","mais","mas","menos","férias","fezes","pêsames","crúcis","gás","atrás","moisés","através","convés","ês","país","após","ambas","ambos","messias", "depois"}}}; + +# Step 2: Adverb Reduction +{ "Adverb", 0, 0, {}, + # felizmente -> feliz + {"mente",4,"",{"experimente"}}}; + +# Step 3: Feminine Reduction +{ "Feminine", 3, 1, {"a","ã"}, + # chefona -> chefão + {"ona",3,"ão",{"abandona","lona","iona","cortisona","monótona","maratona","acetona","detona","carona"}}, + # vilã -> vilão + {"ã",2,"ão",{"amanhã","arapuã","fã","divã"}}, + # professora -> professor + {"ora",3,"or"}, + # americana -> americano + {"na",4,"no",{"carona","abandona","lona","iona","cortisona","monótona","maratona","acetona","detona","guiana","campana","grana","caravana","banana","paisana"}}, + # sozinha -> sozinho + {"inha",3,"inho",{"rainha","linha","minha"}}, + # inglesa -> inglês + {"esa",3,"ês",{"mesa","obesa","princesa","turquesa","ilesa","pesa","presa"}}, + # famosa -> famoso + {"osa",3,"oso",{"mucosa","prosa"}}, + # maníaca -> maníaco + {"íaca",3,"íaco"}, + # prática -> prático + {"ica",3,"ico",{"dica"}}, + # cansada -> cansado + {"ada",2,"ado",{"pitada"}}, + # mantida -> mantido + {"ida",3,"ido",{"vida","dúvida"}}, + {"ída",3,"ido",{"recaída","saída"}}, + # prima -> primo + {"ima",3,"imo",{"vítima"}}, + # passiva -> passivo + {"iva",3,"ivo",{"saliva","oliva"}}, + # primeira -> primeiro + {"eira",3,"eiro",{"beira","cadeira","frigideira","bandeira","feira","capoeira","barreira","fronteira","besteira","poeira"}}}; + +# Step 4: Augmentative/Diminutive Reduction +{ "Augmentative", 0, 1, {}, + # cansadíssimo -> cansad + {"díssimo",5}, + # amabilíssimo -> ama + {"abilíssimo",5}, + # fortíssimo -> fort + {"íssimo",3}, + {"ésimo",3}, + # chiquérrimo -> chiqu + {"érrimo",4}, + # pezinho -> pe + {"zinho",2}, + # maluquinho -> maluc + {"quinho",4,"c"}, + # amiguinho -> amig + {"uinho",4}, + # cansadinho -> cansad + {"adinho",3}, + # carrinho -> carr + {"inho",3,"",{"caminho","cominho"}}, + # grandalhão -> grand + {"alhão",4}, + # dentuça -> dent + {"uça",4}, + # ricaço -> ric + {"aço",4,"",{"antebraço"}}, + {"aça",4}, + # casadão -> cans + {"adão",4}, + {"idão",4}, + # corpázio -> corp + {"ázio",3,"",{"topázio"}}, + # pratarraz -> prat + {"arraz",4}, + {"zarrão",3}, + {"arrão",4}, + # bocarra -> boc + {"arra",3}, + # calorzão -> calor + {"zão",2,"",{"coalizão"}}, + # meninão -> menin + {"ão",3,"",{"camarão","chimarrão","canção","coração","embrião","grotão","glutão","ficção","fogão","feição","furacão","gamão","lampião","leão","macacão","nação","órfão","orgão","patrão","portão","quinhão","rincão","tração","falcão","espião","mamão","folião","cordão","aptidão","campeão","colchão","limão","leilão","melão","barão","milhão","bilhão","fusão","cristão","ilusão","capitão","estação","senão"}}}; + +# Step 5: Noun Suffix Reduction +{ "Noun", 0, 0, {}, + # existencialista -> exist + {"encialista",4}, + # minimalista -> minim + {"alista",5}, + # contagem -> cont + {"agem",3,"",{"coragem","chantagem","vantagem","carruagem"}}, + # gerenciamento -> gerenc + {"iamento",4}, + # monitoramento -> monitor + {"amento",3,"",{"firmamento","fundamento","departamento"}}, + # nascimento -> nasc + {"imento",3}, + {"mento",6,"",{"firmamento","elemento","complemento","instrumento","departamento"}}, + # comercializado -> comerci + {"alizado",4}, + # traumatizado -> traum + {"atizado",4}, + {"tizado",4,"",{"alfabetizado"}}, + # alfabetizado -> alfabet + {"izado",5,"",{"organizado","pulverizado"}}, + # associativo -> associ + {"ativo",4,"",{"pejorativo","relativo"}}, + # contraceptivo -> contracep + {"tivo",4,"",{"relativo"}}, + # esportivo -> esport + {"ivo",4,"",{"passivo","possessivo","pejorativo","positivo"}}, + # abalado -> abal + {"ado",2,"",{"grado"}}, + # impedido -> imped + {"ido",3,"",{"cândido","consolido","rápido","decido","tímido","duvido","marido"}}, + # ralador -> ral + {"ador",3}, + # entendedor -> entend + {"edor",3}, + # cumpridor -> cumpr + {"idor",4,"",{"ouvidor"}}, + {"dor",4,"",{"ouvidor"}}, + {"sor",4,"",{"assessor"}}, + {"atoria",5}, + {"tor",3,"",{"benfeitor","leitor","editor","pastor","produtor","promotor","consultor"}}, + {"or",2,"",{"motor","melhor","redor","rigor","sensor","tambor","tumor","assessor","benfeitor","pastor","terior","favor","autor"}}, + # comparabilidade -> compar + {"abilidade",5}, + # abolicionista -> abol + {"icionista",4}, + # intervencionista -> interven + {"cionista",5}, + {"ionista",5}, + {"ionar",5}, + # profissional -> profiss + {"ional",4}, + # referência -> refer + {"ência",3}, + # repugnância -> repugn + {"ância",4,"",{"ambulância"}}, + # abatedouro -> abat + {"edouro",3}, + # fofoqueiro -> fofoc + {"queiro",3,"c"}, + {"adeiro",4,"",{"desfiladeiro"}}, + # brasileiro -> brasil + {"eiro",3,"",{"desfiladeiro","pioneiro","mosteiro"}}, + {"uoso",3}, + # gostoso -> gost + {"oso",3,"",{"precioso"}}, + # comercializaç -> comerci + {"alizaç",5}, + {"atizaç",5}, + {"tizaç",5}, + {"izaç",5,"",{"organizaç"}}, + # alegaç -> aleg + {"aç",3,"",{"equaç","relaç"}}, + # aboliç -> abol + {"iç",3,"",{"eleiç"}}, + # anedotário -> anedot + {"ário",3,"",{"voluntário","salário","aniversário","diário","lionário","armário"}}, + {"atório",3}, + {"rio",5,"",{"voluntário","salário","aniversário","diário","compulsório","lionário","próprio","stério","armário"}}, + # ministério -> minist + {"ério",6}, + # chinês -> chin + {"ês",4}, + # beleza -> bel + {"eza",3}, + # rigidez -> rigid + {"ez",4}, + # parentesco -> parent + {"esco",4}, + # ocupante -> ocup + {"ante",2,"",{"gigante","elefante","adiante","possante","instante","restaurante"}}, + # bombástico -> bomb + {"ástico",4,"",{"eclesiástico"}}, + {"alístico",3}, + {"áutico",4}, + {"êutico",4}, + {"tico",3,"",{"político","eclesiástico","diagnostico","prático","doméstico","diagnóstico","idêntico","alopático","artístico","autêntico","eclético","crítico","critico"}}, + # polêmico -> polêm + {"ico",4,"",{"tico","público","explico"}}, + # produtividade -> produt + {"ividade",5}, + # profundidade -> profund + {"idade",4,"",{"autoridade","comunidade"}}, + # aposentadoria -> aposentad + {"oria",4,"",{"categoria"}}, + # existencial -> exist + {"encial",5}, + # artista -> art + {"ista",4}, + {"auta",5}, + # maluquice -> maluc + {"quice",4,"c"}, + # chatice -> chat + {"ice",4,"",{"cúmplice"}}, + # demoníaco -> demon + {"íaco",3}, + # decorrente -> decorr + {"ente",4,"",{"freqüente","alimente","acrescente","permanente","oriente","aparente"}}, + {"ense",5}, + # criminal -> crim + {"inal",3}, + # americano -> americ + {"ano",4}, + # amável -> am + {"ável",2,"",{"afável","razoável","potável","vulnerável"}}, + # combustível -> combust + {"ível",3,"",{"possível"}}, + {"vel",5,"",{"possível","vulnerável","solúvel"}}, + {"bil",3,"vel"}, + # cobertura -> cobert + {"ura",4,"",{"imatura","acupuntura","costura"}}, + {"ural",4}, + # consensual -> consens + {"ual",3,"",{"bissexual","virtual","visual","pontual"}}, + # mundial -> mund + {"ial",3}, + # experimental -> experiment + {"al",4,"",{"afinal","animal","estatal","bissexual","desleal","fiscal","formal","pessoal","liberal","postal","virtual","visual","pontual","sideral","sucursal"}}, + {"alismo",4}, + {"ivismo",4}, + {"ismo",3,"",{"cinismo"}}}; + +# Step 6: Verb Suffix Reduction +{ "Verb", 0, 0, {}, + # cantaríamo -> cant + {"aríamo",2}, + # cantássemo -> cant + {"ássemo",2}, + # beberíamo -> beb + {"eríamo",2}, + # bebêssemo -> beb + {"êssemo",2}, + # partiríamo -> part + {"iríamo",3}, + # partíssemo -> part + {"íssemo",3}, + # cantáramo -> cant + {"áramo",2}, + # cantárei -> cant + {"árei",2}, + # cantaremo -> cant + {"aremo",2}, + # cantariam -> cant + {"ariam",2}, + # cantaríei -> cant + {"aríei",2}, + # cantássei -> cant + {"ássei",2}, + # cantassem -> cant + {"assem",2}, + # cantávamo -> cant + {"ávamo",2}, + # bebêramo -> beb + {"êramo",3}, + # beberemo -> beb + {"eremo",3}, + # beberiam -> beb + {"eriam",3}, + # beberíei -> beb + {"eríei",3}, + # bebêssei -> beb + {"êssei",3}, + # bebessem -> beb + {"essem",3}, + # partiríamo -> part + {"íramo",3}, + # partiremo -> part + {"iremo",3}, + # partiriam -> part + {"iriam",3}, + # partiríei -> part + {"iríei",3}, + # partíssei -> part + {"íssei",3}, + # partissem -> part + {"issem",3}, + # cantando -> cant + {"ando",2}, + # bebendo -> beb + {"endo",3}, + # partindo -> part + {"indo",3}, + # propondo -> prop + {"ondo",3}, + # cantaram -> cant + {"aram",2}, + {"arão",2}, + # cantarde -> cant + {"arde",2}, + # cantarei -> cant + {"arei",2}, + # cantarem -> cant + {"arem",2}, + # cantaria -> cant + {"aria",2}, + # cantarmo -> cant + {"armo",2}, + # cantasse -> cant + {"asse",2}, + # cantaste -> cant + {"aste",2}, + # cantavam -> cant + {"avam",2,"",{"agravam"}}, + # cantávei -> cant + {"ávei",2}, + # beberam -> beb + {"eram",3}, + {"erão",3}, + # beberde -> beb + {"erde",3}, + # beberei -> beb + {"erei",3}, + # bebêrei -> beb + {"êrei",3}, + # beberem -> beb + {"erem",3}, + # beberia -> beb + {"eria",3}, + # bebermo -> beb + {"ermo",3}, + # bebesse -> beb + {"esse",3}, + # bebeste -> beb + {"este",3,"",{"faroeste","agreste"}}, + # bebíamo -> beb + {"íamo",3}, + # partiram -> part + {"iram",3}, + # concluíram -> conclu + {"íram",3}, + {"irão",2}, + # partirde -> part + {"irde",2}, + # partírei -> part + {"irei",3,"",{"admirei"}}, + # partirem -> part + {"irem",3,"",{"adquirem"}}, + # partiria -> part + {"iria",3}, + # partirmo -> part + {"irmo",3}, + # partisse -> part + {"isse",3}, + # partiste -> part + {"iste",4}, + {"iava",4,"",{"ampliava"}}, + # cantamo -> cant + {"amo",2}, + {"iona",3}, + # cantara -> cant + {"ara",2,"",{"arara","prepara"}}, + # cantará -> cant + {"ará",2,"",{"alvará"}}, + # cantare -> cant + {"are",2,"",{"prepare"}}, + # cantava -> cant + {"ava",2,"",{"agrava"}}, + # cantemo -> cant + {"emo",2}, + # bebera -> beb + {"era",3,"",{"acelera","espera"}}, + # beberá -> beb + {"erá",3}, + # bebere -> beb + {"ere",3,"",{"espere"}}, + # bebiam -> beb + {"iam",3,"",{"enfiam","ampliam","elogiam","ensaiam"}}, + # bebíei -> beb + {"íei",3}, + # partimo -> part + {"imo",3,"",{"reprimo","intimo","íntimo","nimo","queimo","ximo"}}, + # partira -> part + {"ira",3,"",{"fronteira","sátira"}}, + {"ído",3}, + # partirá -> part + {"irá",3}, + {"tizar",4,"",{"alfabetizar"}}, + {"izar",5,"",{"organizar"}}, + {"itar",5,"",{"acreditar","explicitar","estreitar"}}, + # partire -> part + {"ire",3,"",{"adquire"}}, + # compomo -> comp + {"omo",3}, + # cantai -> cant + {"ai",2}, + # cantam -> cant + {"am",2}, + # barbear -> barb + {"ear",4,"",{"alardear","nuclear"}}, + # cantar -> cant + {"ar",2,"",{"azar","bazaar","patamar"}}, + # cheguei -> cheg + {"uei",3}, + {"uía",5,"u"}, + # cantei -> cant + {"ei",3}, + {"guem",3,"g"}, + # cantem -> cant + {"em",2,"",{"alem","virgem"}}, + # beber -> beb + {"er",2,"",{"éter","pier"}}, + # bebeu -> beb + {"eu",3,"",{"chapeu"}}, + # bebia -> beb + {"ia",3,"",{"estória","fatia","acia","praia","elogia","mania","lábia","aprecia","polícia","arredia","cheia","ásia"}}, + # partir -> part + {"ir",3,"",{"freir"}}, + # partiu -> part + {"iu",3}, + {"eou",5}, + # chegou -> cheg + {"ou",3}, + # bebi -> beb + {"i",3}}; + +# Step 7: Vowel Removal +{ "Vowel", 0, 0, {}, + {"bil",2,"vel"}, + {"gue",2,"g",{"gangue","jegue"}}, + {"á",3}, + {"ê",3,"",{"bebê"}}, + # menina -> menin + {"a",3,"",{"ásia"}}, + # grande -> grand + {"e",3}, + # menino -> menin + {"o",3,"",{"ão"}}}; diff --git a/modules/analysis/common/src/test/org/apache/lucene/analysis/charfilter/HTMLStripCharFilterTest.java b/modules/analysis/common/src/test/org/apache/lucene/analysis/charfilter/HTMLStripCharFilterTest.java index 604f9668d53..f1af45ab350 100644 --- a/modules/analysis/common/src/test/org/apache/lucene/analysis/charfilter/HTMLStripCharFilterTest.java +++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/charfilter/HTMLStripCharFilterTest.java @@ -169,7 +169,7 @@ public class HTMLStripCharFilterTest extends LuceneTestCase { public void testBufferOverflow() throws Exception { StringBuilder testBuilder = new StringBuilder(HTMLStripCharFilter.DEFAULT_READ_AHEAD + 50); - testBuilder.append("ah "); + testBuilder.append("ah ??????"); appendChars(testBuilder, HTMLStripCharFilter.DEFAULT_READ_AHEAD + 500); processBuffer(testBuilder.toString(), "Failed on pseudo proc. instr.");//processing instructions diff --git a/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestStandardAnalyzer.java b/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestStandardAnalyzer.java index cff13e3b68d..5c7a46f0461 100644 --- a/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestStandardAnalyzer.java +++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestStandardAnalyzer.java @@ -201,4 +201,10 @@ public class TestStandardAnalyzer extends BaseTokenStreamTestCase { WordBreakTestUnicode_6_0_0 wordBreakTest = new WordBreakTestUnicode_6_0_0(); wordBreakTest.test(a); } + + public void testSupplementary() throws Exception { + BaseTokenStreamTestCase.assertAnalyzesTo(a, "𩬅艱éŸä‡¹æ„¯ç€›", + new String[] {"𩬅", "艱", "éŸ", "䇹", "愯", "瀛"}, + new String[] { "", "", "", "", "", "" }); + } } diff --git a/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestUAX29URLEmailTokenizer.java b/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestUAX29URLEmailTokenizer.java index 7111920d895..71b37361789 100644 --- a/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestUAX29URLEmailTokenizer.java +++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestUAX29URLEmailTokenizer.java @@ -400,4 +400,10 @@ public class TestUAX29URLEmailTokenizer extends BaseTokenStreamTestCase { WordBreakTestUnicode_6_0_0 wordBreakTest = new WordBreakTestUnicode_6_0_0(); wordBreakTest.test(a); } + + public void testSupplementary() throws Exception { + BaseTokenStreamTestCase.assertAnalyzesTo(a, "𩬅艱éŸä‡¹æ„¯ç€›", + new String[] {"𩬅", "艱", "éŸ", "䇹", "愯", "瀛"}, + new String[] { "", "", "", "", "", "" }); + } } diff --git a/modules/analysis/common/src/test/org/apache/lucene/analysis/el/TestGreekStemmer.java b/modules/analysis/common/src/test/org/apache/lucene/analysis/el/TestGreekStemmer.java index 1b95c29b31a..8b0192e1555 100644 --- a/modules/analysis/common/src/test/org/apache/lucene/analysis/el/TestGreekStemmer.java +++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/el/TestGreekStemmer.java @@ -1,5 +1,22 @@ package org.apache.lucene.analysis.el; +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.BaseTokenStreamTestCase; diff --git a/modules/analysis/common/src/test/org/apache/lucene/analysis/gl/TestGalicianAnalyzer.java b/modules/analysis/common/src/test/org/apache/lucene/analysis/gl/TestGalicianAnalyzer.java new file mode 100644 index 00000000000..b67bf087713 --- /dev/null +++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/gl/TestGalicianAnalyzer.java @@ -0,0 +1,53 @@ +package org.apache.lucene.analysis.gl; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.util.HashSet; +import java.util.Set; + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.BaseTokenStreamTestCase; + +public class TestGalicianAnalyzer extends BaseTokenStreamTestCase { + /** This test fails with NPE when the + * stopwords file is missing in classpath */ + public void testResourcesAvailable() { + new GalicianAnalyzer(TEST_VERSION_CURRENT); + } + + /** test stopwords and stemming */ + public void testBasics() throws IOException { + Analyzer a = new GalicianAnalyzer(TEST_VERSION_CURRENT); + // stemming + checkOneTermReuse(a, "correspondente", "correspond"); + checkOneTermReuse(a, "corresponderá", "correspond"); + // stopword + assertAnalyzesTo(a, "e", new String[] {}); + } + + /** test use of exclusion set */ + public void testExclude() throws IOException { + Set exclusionSet = new HashSet(); + exclusionSet.add("correspondente"); + Analyzer a = new GalicianAnalyzer(TEST_VERSION_CURRENT, + GalicianAnalyzer.getDefaultStopSet(), exclusionSet); + checkOneTermReuse(a, "correspondente", "correspondente"); + checkOneTermReuse(a, "corresponderá", "correspond"); + } +} diff --git a/modules/analysis/common/src/test/org/apache/lucene/analysis/gl/TestGalicianStemFilter.java b/modules/analysis/common/src/test/org/apache/lucene/analysis/gl/TestGalicianStemFilter.java new file mode 100644 index 00000000000..85f0efbdf1d --- /dev/null +++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/gl/TestGalicianStemFilter.java @@ -0,0 +1,52 @@ +package org.apache.lucene.analysis.gl; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import static org.apache.lucene.analysis.util.VocabularyAssert.assertVocabulary; + +import java.io.IOException; +import java.io.Reader; + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.BaseTokenStreamTestCase; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.Tokenizer; +import org.apache.lucene.analysis.core.LowerCaseFilter; +import org.apache.lucene.analysis.standard.StandardTokenizer; +import org.apache.lucene.analysis.util.ReusableAnalyzerBase; + +/** + * Simple tests for {@link GalicianStemFilter} + */ +public class TestGalicianStemFilter extends BaseTokenStreamTestCase { + private Analyzer analyzer = new ReusableAnalyzerBase() { + @Override + protected TokenStreamComponents createComponents(String fieldName, + Reader reader) { + Tokenizer source = new StandardTokenizer(TEST_VERSION_CURRENT, reader); + TokenStream result = new LowerCaseFilter(TEST_VERSION_CURRENT, source); + return new TokenStreamComponents(source, new GalicianStemFilter(result)); + } + }; + + + /** Test against a vocabulary from the reference impl */ + public void testVocabulary() throws IOException { + assertVocabulary(analyzer, getDataFile("gltestdata.zip"), "gl.txt"); + } +} diff --git a/modules/analysis/common/src/test/org/apache/lucene/analysis/gl/gltestdata.zip b/modules/analysis/common/src/test/org/apache/lucene/analysis/gl/gltestdata.zip new file mode 100644 index 00000000000..d6840d98ad0 Binary files /dev/null and b/modules/analysis/common/src/test/org/apache/lucene/analysis/gl/gltestdata.zip differ diff --git a/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestKeepWordFilter.java b/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestKeepWordFilter.java index 5039b4bc47a..2ec9cb92872 100644 --- a/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestKeepWordFilter.java +++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestKeepWordFilter.java @@ -35,16 +35,26 @@ public class TestKeepWordFilter extends BaseTokenStreamTestCase { words.add( "aaa" ); words.add( "bbb" ); - String input = "aaa BBB ccc ddd EEE"; + String input = "xxx yyy aaa zzz BBB ccc ddd EEE"; // Test Stopwords TokenStream stream = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(input)); - stream = new KeepWordFilter(stream, new CharArraySet(TEST_VERSION_CURRENT, words, true)); - assertTokenStreamContents(stream, new String[] { "aaa", "BBB" }); + stream = new KeepWordFilter(true, stream, new CharArraySet(TEST_VERSION_CURRENT, words, true)); + assertTokenStreamContents(stream, new String[] { "aaa", "BBB" }, new int[] { 3, 2 }); // Now force case stream = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(input)); - stream = new KeepWordFilter(stream, new CharArraySet(TEST_VERSION_CURRENT,words, false)); - assertTokenStreamContents(stream, new String[] { "aaa" }); + stream = new KeepWordFilter(true, stream, new CharArraySet(TEST_VERSION_CURRENT,words, false)); + assertTokenStreamContents(stream, new String[] { "aaa" }, new int[] { 3 }); + + // Test Stopwords + stream = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(input)); + stream = new KeepWordFilter(false, stream, new CharArraySet(TEST_VERSION_CURRENT, words, true)); + assertTokenStreamContents(stream, new String[] { "aaa", "BBB" }, new int[] { 1, 1 }); + + // Now force case + stream = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(input)); + stream = new KeepWordFilter(false, stream, new CharArraySet(TEST_VERSION_CURRENT,words, false)); + assertTokenStreamContents(stream, new String[] { "aaa" }, new int[] { 1 }); } } diff --git a/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestKeywordMarkerFilter.java b/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestKeywordMarkerFilter.java index f12e7c488c8..4637ee1210b 100644 --- a/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestKeywordMarkerFilter.java +++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestKeywordMarkerFilter.java @@ -2,6 +2,7 @@ package org.apache.lucene.analysis.miscellaneous; import java.io.IOException; import java.io.StringReader; +import java.util.Arrays; import java.util.HashSet; import java.util.Locale; import java.util.Set; @@ -57,6 +58,19 @@ public class TestKeywordMarkerFilter extends BaseTokenStreamTestCase { "The quIck browN LuceneFox Jumps")), set2)), output); } + // LUCENE-2901 + public void testComposition() throws Exception { + TokenStream ts = new LowerCaseFilterMock( + new KeywordMarkerFilter( + new KeywordMarkerFilter( + new WhitespaceTokenizer(TEST_VERSION_CURRENT, + new StringReader("Dogs Trees Birds Houses")), + new HashSet(Arrays.asList(new String[] { "Birds", "Houses" }))), + new HashSet(Arrays.asList(new String[] { "Dogs", "Trees" })))); + + assertTokenStreamContents(ts, new String[] { "Dogs", "Trees", "Birds", "Houses" }); + } + public static final class LowerCaseFilterMock extends TokenFilter { private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); diff --git a/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLengthFilter.java b/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLengthFilter.java index de8b7311d19..070164c0161 100644 --- a/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLengthFilter.java +++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLengthFilter.java @@ -24,19 +24,24 @@ import java.io.StringReader; public class TestLengthFilter extends BaseTokenStreamTestCase { - public void testFilter() throws Exception { + public void testFilterNoPosIncr() throws Exception { TokenStream stream = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("short toolong evenmuchlongertext a ab toolong foo")); - LengthFilter filter = new LengthFilter(stream, 2, 6); - CharTermAttribute termAtt = filter.getAttribute(CharTermAttribute.class); + LengthFilter filter = new LengthFilter(false, stream, 2, 6); + assertTokenStreamContents(filter, + new String[]{"short", "ab", "foo"}, + new int[]{1, 1, 1} + ); + } - assertTrue(filter.incrementToken()); - assertEquals("short", termAtt.toString()); - assertTrue(filter.incrementToken()); - assertEquals("ab", termAtt.toString()); - assertTrue(filter.incrementToken()); - assertEquals("foo", termAtt.toString()); - assertFalse(filter.incrementToken()); + public void testFilterWithPosIncr() throws Exception { + TokenStream stream = new WhitespaceTokenizer(TEST_VERSION_CURRENT, + new StringReader("short toolong evenmuchlongertext a ab toolong foo")); + LengthFilter filter = new LengthFilter(true, stream, 2, 6); + assertTokenStreamContents(filter, + new String[]{"short", "ab", "foo"}, + new int[]{1, 4, 2} + ); } } diff --git a/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLimitTokenCountAnalyzer.java b/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLimitTokenCountAnalyzer.java index 9a9ac0e8dda..3f6c3ead770 100644 --- a/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLimitTokenCountAnalyzer.java +++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLimitTokenCountAnalyzer.java @@ -22,8 +22,16 @@ import java.io.StringReader; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.BaseTokenStreamTestCase; +import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.analysis.core.WhitespaceAnalyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.index.Term; +import org.apache.lucene.store.Directory; public class TestLimitTokenCountAnalyzer extends BaseTokenStreamTestCase { @@ -39,4 +47,26 @@ public class TestLimitTokenCountAnalyzer extends BaseTokenStreamTestCase { assertTokenStreamContents(a.reusableTokenStream("dummy", new StringReader("1 2 3 4 5")), new String[] { "1", "2" }, new int[] { 0, 2 }, new int[] { 1, 3 }, 3); } + public void testLimitTokenCountIndexWriter() throws IOException { + Directory dir = newDirectory(); + + IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig( + TEST_VERSION_CURRENT, new LimitTokenCountAnalyzer(new MockAnalyzer(), 100000))); + + Document doc = new Document(); + StringBuilder b = new StringBuilder(); + for(int i=0;i<10000;i++) + b.append(" a"); + b.append(" x"); + doc.add(newField("field", b.toString(), Field.Store.NO, Field.Index.ANALYZED)); + writer.addDocument(doc); + writer.close(); + + IndexReader reader = IndexReader.open(dir, true); + Term t = new Term("field", "x"); + assertEquals(1, reader.docFreq(t)); + reader.close(); + dir.close(); + } + } \ No newline at end of file diff --git a/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestRemoveDuplicatesTokenFilter.java b/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestRemoveDuplicatesTokenFilter.java index 946f9787c4c..9f3a28ad638 100644 --- a/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestRemoveDuplicatesTokenFilter.java +++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestRemoveDuplicatesTokenFilter.java @@ -47,6 +47,7 @@ public class TestRemoveDuplicatesTokenFilter extends BaseTokenStreamTestCase { CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class); PositionIncrementAttribute posIncAtt = addAttribute(PositionIncrementAttribute.class); + @Override public boolean incrementToken() { if (toks.hasNext()) { clearAttributes(); diff --git a/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestTrimFilter.java b/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestTrimFilter.java index 4e65f9b11a4..256cbacd1ca 100644 --- a/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestTrimFilter.java +++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestTrimFilter.java @@ -87,6 +87,7 @@ public class TestTrimFilter extends BaseTokenStreamTestCase { this(tokens.toArray(new Token[tokens.size()])); } + @Override public boolean incrementToken() throws IOException { if (index >= tokens.length) return false; diff --git a/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestWordDelimiterFilter.java b/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestWordDelimiterFilter.java index c784130d439..3d081184e58 100644 --- a/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestWordDelimiterFilter.java +++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestWordDelimiterFilter.java @@ -213,6 +213,7 @@ public class TestWordDelimiterFilter extends BaseTokenStreamTestCase { /* analyzer that uses whitespace + wdf */ Analyzer a = new Analyzer() { + @Override public TokenStream tokenStream(String field, Reader reader) { return new WordDelimiterFilter( new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader), @@ -239,6 +240,7 @@ public class TestWordDelimiterFilter extends BaseTokenStreamTestCase { /* analyzer that will consume tokens with large position increments */ Analyzer a2 = new Analyzer() { + @Override public TokenStream tokenStream(String field, Reader reader) { return new WordDelimiterFilter( new LargePosIncTokenFilter( @@ -271,6 +273,7 @@ public class TestWordDelimiterFilter extends BaseTokenStreamTestCase { new int[] { 1, 11, 1 }); Analyzer a3 = new Analyzer() { + @Override public TokenStream tokenStream(String field, Reader reader) { StopFilter filter = new StopFilter(TEST_VERSION_CURRENT, new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader), StandardAnalyzer.STOP_WORDS_SET); diff --git a/modules/analysis/common/src/test/org/apache/lucene/analysis/path/TestPathHierarchyTokenizer.java b/modules/analysis/common/src/test/org/apache/lucene/analysis/path/TestPathHierarchyTokenizer.java new file mode 100644 index 00000000000..cb0adc9e474 --- /dev/null +++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/path/TestPathHierarchyTokenizer.java @@ -0,0 +1,130 @@ +package org.apache.lucene.analysis.path; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.StringReader; + +import org.apache.lucene.analysis.BaseTokenStreamTestCase; +import org.apache.lucene.analysis.CharStream; +import org.apache.lucene.analysis.charfilter.MappingCharFilter; +import org.apache.lucene.analysis.charfilter.NormalizeCharMap; + +public class TestPathHierarchyTokenizer extends BaseTokenStreamTestCase { + + public void testBasic() throws Exception { + String path = "/a/b/c"; + PathHierarchyTokenizer t = new PathHierarchyTokenizer( new StringReader(path) ); + assertTokenStreamContents(t, + new String[]{"/a", "/a/b", "/a/b/c"}, + new int[]{0, 0, 0}, + new int[]{2, 4, 6}, + new int[]{1, 0, 0}, + path.length()); + } + + public void testEndOfDelimiter() throws Exception { + String path = "/a/b/c/"; + PathHierarchyTokenizer t = new PathHierarchyTokenizer( new StringReader(path) ); + assertTokenStreamContents(t, + new String[]{"/a", "/a/b", "/a/b/c", "/a/b/c/"}, + new int[]{0, 0, 0, 0}, + new int[]{2, 4, 6, 7}, + new int[]{1, 0, 0, 0}, + path.length()); + } + + public void testStartOfChar() throws Exception { + String path = "a/b/c"; + PathHierarchyTokenizer t = new PathHierarchyTokenizer( new StringReader(path) ); + assertTokenStreamContents(t, + new String[]{"a", "a/b", "a/b/c"}, + new int[]{0, 0, 0}, + new int[]{1, 3, 5}, + new int[]{1, 0, 0}, + path.length()); + } + + public void testStartOfCharEndOfDelimiter() throws Exception { + String path = "a/b/c/"; + PathHierarchyTokenizer t = new PathHierarchyTokenizer( new StringReader(path) ); + assertTokenStreamContents(t, + new String[]{"a", "a/b", "a/b/c", "a/b/c/"}, + new int[]{0, 0, 0, 0}, + new int[]{1, 3, 5, 6}, + new int[]{1, 0, 0, 0}, + path.length()); + } + + public void testOnlyDelimiter() throws Exception { + String path = "/"; + PathHierarchyTokenizer t = new PathHierarchyTokenizer( new StringReader(path) ); + assertTokenStreamContents(t, + new String[]{"/"}, + new int[]{0}, + new int[]{1}, + new int[]{1}, + path.length()); + } + + public void testOnlyDelimiters() throws Exception { + String path = "//"; + PathHierarchyTokenizer t = new PathHierarchyTokenizer( new StringReader(path) ); + assertTokenStreamContents(t, + new String[]{"/", "//"}, + new int[]{0, 0}, + new int[]{1, 2}, + new int[]{1, 0}, + path.length()); + } + + public void testReplace() throws Exception { + String path = "/a/b/c"; + PathHierarchyTokenizer t = new PathHierarchyTokenizer( new StringReader(path), '/', '\\' ); + assertTokenStreamContents(t, + new String[]{"\\a", "\\a\\b", "\\a\\b\\c"}, + new int[]{0, 0, 0}, + new int[]{2, 4, 6}, + new int[]{1, 0, 0}, + path.length()); + } + + public void testWindowsPath() throws Exception { + String path = "c:\\a\\b\\c"; + PathHierarchyTokenizer t = new PathHierarchyTokenizer( new StringReader(path), '\\', '\\' ); + assertTokenStreamContents(t, + new String[]{"c:", "c:\\a", "c:\\a\\b", "c:\\a\\b\\c"}, + new int[]{0, 0, 0, 0}, + new int[]{2, 4, 6, 8}, + new int[]{1, 0, 0, 0}, + path.length()); + } + + public void testNormalizeWinDelimToLinuxDelim() throws Exception { + NormalizeCharMap normMap = new NormalizeCharMap(); + normMap.add("\\", "/"); + String path = "c:\\a\\b\\c"; + CharStream cs = new MappingCharFilter(normMap, new StringReader(path)); + PathHierarchyTokenizer t = new PathHierarchyTokenizer( cs ); + assertTokenStreamContents(t, + new String[]{"c:", "c:/a", "c:/a/b", "c:/a/b/c"}, + new int[]{0, 0, 0, 0}, + new int[]{2, 4, 6, 8}, + new int[]{1, 0, 0, 0}, + path.length()); + } +} diff --git a/modules/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseStemFilter.java b/modules/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseStemFilter.java new file mode 100644 index 00000000000..ee7c6eee4db --- /dev/null +++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseStemFilter.java @@ -0,0 +1,69 @@ +package org.apache.lucene.analysis.pt; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import static org.apache.lucene.analysis.util.VocabularyAssert.assertVocabulary; + +import java.io.IOException; +import java.io.Reader; + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.BaseTokenStreamTestCase; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.Tokenizer; +import org.apache.lucene.analysis.core.LowerCaseFilter; +import org.apache.lucene.analysis.standard.StandardTokenizer; +import org.apache.lucene.analysis.util.ReusableAnalyzerBase; + +/** + * Simple tests for {@link PortugueseStemFilter} + */ +public class TestPortugueseStemFilter extends BaseTokenStreamTestCase { + private Analyzer analyzer = new ReusableAnalyzerBase() { + @Override + protected TokenStreamComponents createComponents(String fieldName, + Reader reader) { + Tokenizer source = new StandardTokenizer(TEST_VERSION_CURRENT, reader); + TokenStream result = new LowerCaseFilter(TEST_VERSION_CURRENT, source); + return new TokenStreamComponents(source, new PortugueseStemFilter(result)); + } + }; + + /** + * Test the example from the paper "Assessing the impact of stemming accuracy + * on information retrieval" + */ + public void testExamples() throws IOException { + assertAnalyzesTo( + analyzer, + "O debate político, pelo menos o que vem a público, parece, de modo nada " + + "surpreendente, restrito a temas menores. Mas há, evidentemente, " + + "grandes questões em jogo nas eleições que se aproximam.", + new String[] { + "o", "debat", "politic", "pel", "menos", "o", "que", "vem", "a", + "public", "parec", "de", "mod", "nad", "surpreend", "restrit", + "a", "tem", "men", "mas", "ha", "evid", "grand", "quest", + "em", "jog", "na", "eleic", "que", "se", "aproxim" + }); + } + + /** Test against a vocabulary from the reference impl */ + public void testVocabulary() throws IOException { + assertVocabulary(analyzer, getDataFile("ptrslptestdata.zip"), "ptrslp.txt"); + } +} diff --git a/modules/analysis/common/src/test/org/apache/lucene/analysis/pt/ptrslptestdata.zip b/modules/analysis/common/src/test/org/apache/lucene/analysis/pt/ptrslptestdata.zip new file mode 100644 index 00000000000..7bcd3367327 Binary files /dev/null and b/modules/analysis/common/src/test/org/apache/lucene/analysis/pt/ptrslptestdata.zip differ diff --git a/modules/analysis/common/src/test/org/apache/lucene/analysis/query/QueryAutoStopWordAnalyzerTest.java b/modules/analysis/common/src/test/org/apache/lucene/analysis/query/QueryAutoStopWordAnalyzerTest.java index 7f7b6d64ff4..005b2e67eed 100644 --- a/modules/analysis/common/src/test/org/apache/lucene/analysis/query/QueryAutoStopWordAnalyzerTest.java +++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/query/QueryAutoStopWordAnalyzerTest.java @@ -76,7 +76,10 @@ public class QueryAutoStopWordAnalyzerTest extends BaseTokenStreamTestCase { private int search(Analyzer a, String queryString) throws IOException, ParseException { QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "repetitiveField", a); Query q = qp.parse(queryString); - return new IndexSearcher(reader).search(q, null, 1000).totalHits; + IndexSearcher searcher = newSearcher(reader); + int hits = searcher.search(q, null, 1000).totalHits; + searcher.close(); + return hits; } public void testUninitializedAnalyzer() throws Exception { diff --git a/modules/analysis/common/src/test/org/apache/lucene/analysis/synonym/TestSynonymFilter.java b/modules/analysis/common/src/test/org/apache/lucene/analysis/synonym/TestSynonymFilter.java index 2c68e047abf..29c26d6ff2b 100644 --- a/modules/analysis/common/src/test/org/apache/lucene/analysis/synonym/TestSynonymFilter.java +++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/synonym/TestSynonymFilter.java @@ -395,6 +395,7 @@ public class TestSynonymFilter extends BaseTokenStreamTestCase { this(tokens.toArray(new Token[tokens.size()])); } + @Override public boolean incrementToken() throws IOException { if (index >= tokens.length) return false; diff --git a/modules/analysis/common/src/test/org/apache/lucene/collation/CollationTestBase.java b/modules/analysis/common/src/test/org/apache/lucene/collation/CollationTestBase.java index 11b4eb5474e..1f7b511e7ca 100644 --- a/modules/analysis/common/src/test/org/apache/lucene/collation/CollationTestBase.java +++ b/modules/analysis/common/src/test/org/apache/lucene/collation/CollationTestBase.java @@ -141,7 +141,7 @@ public abstract class CollationTestBase extends LuceneTestCase { writer.close(); IndexReader reader = IndexReader.open(farsiIndex, true); - IndexSearcher search = new IndexSearcher(reader); + IndexSearcher search = newSearcher(reader); // Unicode order would include U+0633 in [ U+062F - U+0698 ], but Farsi // orders the U+0698 character before the U+0633 character, so the single diff --git a/modules/analysis/icu/build.xml b/modules/analysis/icu/build.xml index 3af4aaa66f8..2b443ce464b 100644 --- a/modules/analysis/icu/build.xml +++ b/modules/analysis/icu/build.xml @@ -49,6 +49,7 @@ + @@ -107,6 +108,23 @@ are part of the ICU4C package. See http://site.icu-project.org/ + + + + + + + + + + + - - - 4.0.0 - - org.apache.lucene - lucene-contrib - @version@ - - org.apache.lucene - lucene-analyzers-icu - - Lucene ICUCollationKeyFilter/Analyzer - - @version@ - - Provides integration with ICU (International Components for Unicode) for - stronger Unicode and internationalization support. - - jar - - - com.ibm.icu - icu4j - ${icu-version} - - - diff --git a/modules/analysis/icu/src/java/org/apache/lucene/analysis/icu/tokenattributes/ScriptAttributeImpl.java b/modules/analysis/icu/src/java/org/apache/lucene/analysis/icu/tokenattributes/ScriptAttributeImpl.java index 7e33ee7875f..3a54af94b58 100644 --- a/modules/analysis/icu/src/java/org/apache/lucene/analysis/icu/tokenattributes/ScriptAttributeImpl.java +++ b/modules/analysis/icu/src/java/org/apache/lucene/analysis/icu/tokenattributes/ScriptAttributeImpl.java @@ -20,6 +20,7 @@ package org.apache.lucene.analysis.icu.tokenattributes; import java.io.Serializable; import org.apache.lucene.util.AttributeImpl; +import org.apache.lucene.util.AttributeReflector; import com.ibm.icu.lang.UScript; @@ -77,7 +78,7 @@ public class ScriptAttributeImpl extends AttributeImpl implements ScriptAttribut } @Override - public String toString() { - return "script=" + getName(); + public void reflectWith(AttributeReflector reflector) { + reflector.reflect(ScriptAttribute.class, "script", getName()); } } diff --git a/modules/analysis/icu/src/tools/java/org/apache/lucene/analysis/icu/GenerateJFlexSupplementaryMacros.java b/modules/analysis/icu/src/tools/java/org/apache/lucene/analysis/icu/GenerateJFlexSupplementaryMacros.java new file mode 100644 index 00000000000..cd7bfea6f51 --- /dev/null +++ b/modules/analysis/icu/src/tools/java/org/apache/lucene/analysis/icu/GenerateJFlexSupplementaryMacros.java @@ -0,0 +1,114 @@ +package org.apache.lucene.analysis.icu; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.text.DateFormat; +import java.util.Date; +import java.util.HashMap; +import java.util.Locale; +import java.util.TimeZone; + +import com.ibm.icu.text.UnicodeSet; +import com.ibm.icu.text.UnicodeSetIterator; +import com.ibm.icu.util.VersionInfo; + +/** creates a macro to augment jflex's unicode wordbreak support for > BMP */ +public class GenerateJFlexSupplementaryMacros { + private static final UnicodeSet BMP = new UnicodeSet("[\u0000-\uFFFF]"); + private static final String NL = System.getProperty("line.separator"); + private static final DateFormat DATE_FORMAT = DateFormat.getDateTimeInstance + (DateFormat.FULL, DateFormat.FULL, Locale.US); + static { + DATE_FORMAT.setTimeZone(TimeZone.getTimeZone("UTC")); + } + + private static final String APACHE_LICENSE + = "/*" + NL + + " * Copyright 2010 The Apache Software Foundation." + NL + + " *" + NL + + " * Licensed under the Apache License, Version 2.0 (the \"License\");" + NL + + " * you may not use this file except in compliance with the License." + NL + + " * You may obtain a copy of the License at" + NL + + " *" + NL + + " * http://www.apache.org/licenses/LICENSE-2.0" + NL + + " *" + NL + + " * Unless required by applicable law or agreed to in writing, software" + NL + + " * distributed under the License is distributed on an \"AS IS\" BASIS," + NL + + " * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied." + NL + + " * See the License for the specific language governing permissions and" + NL + + " * limitations under the License." + NL + + " */" + NL + NL; + + + public static void main(String args[]) throws Exception { + outputHeader(); + outputMacro("ALetterSupp", "[:WordBreak=ALetter:]"); + outputMacro("FormatSupp", "[:WordBreak=Format:]"); + outputMacro("ExtendSupp", "[:WordBreak=Extend:]"); + outputMacro("NumericSupp", "[:WordBreak=Numeric:]"); + outputMacro("KatakanaSupp", "[:WordBreak=Katakana:]"); + outputMacro("MidLetterSupp", "[:WordBreak=MidLetter:]"); + outputMacro("MidNumSupp", "[:WordBreak=MidNum:]"); + outputMacro("MidNumLetSupp", "[:WordBreak=MidNumLet:]"); + outputMacro("ExtendNumLetSupp", "[:WordBreak=ExtendNumLet:]"); + outputMacro("ExtendNumLetSupp", "[:WordBreak=ExtendNumLet:]"); + outputMacro("ComplexContextSupp", "[:LineBreak=Complex_Context:]"); + outputMacro("HanSupp", "[:Script=Han:]"); + outputMacro("HiraganaSupp", "[:Script=Hiragana:]"); + } + + static void outputHeader() { + System.out.print(APACHE_LICENSE); + System.out.print("// Generated using ICU4J " + VersionInfo.ICU_VERSION.toString() + " on "); + System.out.println(DATE_FORMAT.format(new Date())); + System.out.println("// by " + GenerateJFlexSupplementaryMacros.class.getName()); + System.out.print(NL + NL); + } + + // we have to carefully output the possibilities as compact utf-16 + // range expressions, or jflex will OOM! + static void outputMacro(String name, String pattern) { + UnicodeSet set = new UnicodeSet(pattern); + set.removeAll(BMP); + System.out.println(name + " = ("); + // if the set is empty, we have to do this or jflex will barf + if (set.isEmpty()) { + System.out.println("\t []"); + } + + HashMap utf16ByLead = new HashMap(); + for (UnicodeSetIterator it = new UnicodeSetIterator(set); it.next();) { + char utf16[] = Character.toChars(it.codepoint); + UnicodeSet trails = utf16ByLead.get(utf16[0]); + if (trails == null) { + trails = new UnicodeSet(); + utf16ByLead.put(utf16[0], trails); + } + trails.add(utf16[1]); + } + + boolean isFirst = true; + for (Character c : utf16ByLead.keySet()) { + UnicodeSet trail = utf16ByLead.get(c); + System.out.print( isFirst ? "\t " : "\t| "); + isFirst = false; + System.out.println("([\\u" + Integer.toHexString(c) + "]" + trail.getRegexEquivalent() + ")"); + } + System.out.println(")"); + } +} diff --git a/modules/analysis/phonetic/build.xml b/modules/analysis/phonetic/build.xml index 9efd18a94b8..e8625d49221 100644 --- a/modules/analysis/phonetic/build.xml +++ b/modules/analysis/phonetic/build.xml @@ -48,6 +48,7 @@ + diff --git a/modules/analysis/phonetic/pom.xml.template b/modules/analysis/phonetic/pom.xml.template deleted file mode 100644 index 5d3b27ce71c..00000000000 --- a/modules/analysis/phonetic/pom.xml.template +++ /dev/null @@ -1,46 +0,0 @@ - - - - 4.0.0 - - org.apache.lucene - lucene-contrib - @version@ - - org.apache.lucene - lucene-analyzers-phonetic - - Lucene Phonetic Filters - - @version@ - - Provides phonetic encoding via Commons Codec. - - jar - - - commons-codec - commons-codec - 1.4 - - - diff --git a/modules/analysis/smartcn/build.xml b/modules/analysis/smartcn/build.xml index a793dcd9b44..075f8f497e6 100644 --- a/modules/analysis/smartcn/build.xml +++ b/modules/analysis/smartcn/build.xml @@ -25,7 +25,6 @@ - @@ -40,6 +39,7 @@ + diff --git a/modules/analysis/smartcn/pom.xml.template b/modules/analysis/smartcn/pom.xml.template deleted file mode 100644 index bf98c18002d..00000000000 --- a/modules/analysis/smartcn/pom.xml.template +++ /dev/null @@ -1,35 +0,0 @@ - - - - 4.0.0 - - org.apache.lucene - lucene-contrib - @version@ - - org.apache.lucene - lucene-analyzers-smartcn - Lucene Smart Chinese Analyzer - @version@ - Smart Chinese Analyzer - jar - diff --git a/modules/analysis/stempel/build.xml b/modules/analysis/stempel/build.xml index 513979117a9..517591f2727 100644 --- a/modules/analysis/stempel/build.xml +++ b/modules/analysis/stempel/build.xml @@ -25,7 +25,6 @@ - @@ -39,6 +38,7 @@ + diff --git a/modules/analysis/stempel/pom.xml.template b/modules/analysis/stempel/pom.xml.template deleted file mode 100644 index 3cf3ca6c9f5..00000000000 --- a/modules/analysis/stempel/pom.xml.template +++ /dev/null @@ -1,35 +0,0 @@ - - - - 4.0.0 - - org.apache.lucene - lucene-contrib - @version@ - - org.apache.lucene - lucene-analyzers-stempel - Lucene Stempel Analyzer - @version@ - Stempel Analyzer - jar - diff --git a/modules/benchmark/CHANGES.txt b/modules/benchmark/CHANGES.txt index 58f0f708a3c..8f5f082e7dd 100644 --- a/modules/benchmark/CHANGES.txt +++ b/modules/benchmark/CHANGES.txt @@ -2,6 +2,21 @@ Lucene Benchmark Contrib Change Log The Benchmark contrib package contains code for benchmarking Lucene in a variety of ways. +02/05/2011 + LUCENE-1540: Improvements to contrib.benchmark for TREC collections. + ContentSource can now process plain text files, gzip files, and bzip2 files. + TREC doc parsing now handles the TREC gov2 collection and TREC disks 4&5-CR + collection (both used by many TREC tasks). (Shai Erera, Doron Cohen) + +01/26/2011 + LUCENE-929: ExtractReuters first extracts to a tmp dir and then renames. That + way, if a previous extract attempt failed, "ant extract-reuters" will still + extract the files. (Shai Erera, Doron Cohen, Grant Ingersoll) + +01/24/2011 + LUCENE-2885: Add WaitForMerges task (calls IndexWriter.waitForMerges()). + (Mike McCandless) + 10/10/2010 The locally built patched version of the Xerces-J jar introduced as part of LUCENE-1591 is no longer required, because Xerces diff --git a/modules/benchmark/build.xml b/modules/benchmark/build.xml index 3bccf5647e1..10d1510fd1d 100644 --- a/modules/benchmark/build.xml +++ b/modules/benchmark/build.xml @@ -7,7 +7,7 @@ - + @@ -87,7 +87,6 @@ - @@ -260,5 +259,6 @@ - + + diff --git a/modules/benchmark/conf/createLineFile.alg b/modules/benchmark/conf/createLineFile.alg index 969f30762df..cad01d991cf 100644 --- a/modules/benchmark/conf/createLineFile.alg +++ b/modules/benchmark/conf/createLineFile.alg @@ -29,10 +29,14 @@ # # Where to get documents from: -content.source=org.apache.lucene.benchmark.byTask.feeds.ReutersContentSource +content.source=org.apache.lucene.benchmark.byTask.feeds.EnwikiContentSource # Where to write the line file output: -line.file.out=work/reuters.lines.txt +line.file.out=/x/tmp/enwiki.out.txt + +docs.file=/x/lucene/data/enwiki/enwiki-20110115-pages-articles.xml + +keep.image.only.docs = false # Stop after processing the document feed once: content.source.forever=false diff --git a/modules/benchmark/lib/xercesImpl-2.10.0.jar b/modules/benchmark/lib/xercesImpl-2.10.0.jar deleted file mode 100644 index 11b416c0503..00000000000 --- a/modules/benchmark/lib/xercesImpl-2.10.0.jar +++ /dev/null @@ -1,2 +0,0 @@ -AnyObjectId[9dcd8c38196b24e51f78d8e1b0a42d1ffef60acb] was removed in git history. -Apache SVN contains full history. \ No newline at end of file diff --git a/modules/benchmark/lib/xercesImpl-2.9.1-patched-XERCESJ-1257.jar b/modules/benchmark/lib/xercesImpl-2.9.1-patched-XERCESJ-1257.jar new file mode 100644 index 00000000000..6eacbf558b1 --- /dev/null +++ b/modules/benchmark/lib/xercesImpl-2.9.1-patched-XERCESJ-1257.jar @@ -0,0 +1,2 @@ +AnyObjectId[bbb5aa7ad5bcea61c5c66ceb2ba340431cc7262d] was removed in git history. +Apache SVN contains full history. \ No newline at end of file diff --git a/modules/benchmark/lib/xml-apis-2.10.0.jar b/modules/benchmark/lib/xml-apis-2.10.0.jar deleted file mode 100644 index c59f0f17531..00000000000 --- a/modules/benchmark/lib/xml-apis-2.10.0.jar +++ /dev/null @@ -1,2 +0,0 @@ -AnyObjectId[46733464fc746776c331ecc51061f3a05e662fd1] was removed in git history. -Apache SVN contains full history. \ No newline at end of file diff --git a/modules/benchmark/lib/xml-apis-2.9.0.jar b/modules/benchmark/lib/xml-apis-2.9.0.jar new file mode 100644 index 00000000000..214dd3e0819 --- /dev/null +++ b/modules/benchmark/lib/xml-apis-2.9.0.jar @@ -0,0 +1,2 @@ +AnyObjectId[d42c0ea6cfd17ed6b444b8337febbc0bdb55ed83] was removed in git history. +Apache SVN contains full history. \ No newline at end of file diff --git a/modules/benchmark/pom.xml.template b/modules/benchmark/pom.xml.template deleted file mode 100644 index 9e6a1ecd7e0..00000000000 --- a/modules/benchmark/pom.xml.template +++ /dev/null @@ -1,67 +0,0 @@ - - - - 4.0.0 - - org.apache.lucene - lucene-contrib - @version@ - - org.apache.lucene - lucene-benchmark - Lucene Benchmark - @version@ - Lucene Benchmarking Contributions - jar - - - org.apache.lucene - lucene-demos - @version@ - - - org.apache.lucene - lucene-highlighter - @version@ - - - commons-beanutils - commons-beanutils - ${commons-beanutils-version} - - - commons-collections - commons-collections - ${commons-collections-version} - - - commons-digester - commons-digester - ${commons-digester-version} - - - commons-logging - commons-logging - ${commons-logging-version} - - - diff --git a/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ContentSource.java b/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ContentSource.java index 817e57d1c03..b831e69adab 100644 --- a/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ContentSource.java +++ b/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ContentSource.java @@ -56,11 +56,14 @@ import org.apache.lucene.benchmark.byTask.utils.Config; public abstract class ContentSource { private static final int BZIP = 0; - private static final int OTHER = 1; + private static final int GZIP = 1; + private static final int OTHER = 2; private static final Map extensionToType = new HashMap(); static { extensionToType.put(".bz2", Integer.valueOf(BZIP)); extensionToType.put(".bzip", Integer.valueOf(BZIP)); + extensionToType.put(".gz", Integer.valueOf(GZIP)); + extensionToType.put(".gzip", Integer.valueOf(GZIP)); } protected static final int BUFFER_SIZE = 1 << 16; // 64K @@ -78,11 +81,13 @@ public abstract class ContentSource { private CompressorStreamFactory csFactory = new CompressorStreamFactory(); + /** update count of bytes generated by this source */ protected final synchronized void addBytes(long numBytes) { bytesCount += numBytes; totalBytesCount += numBytes; } + /** update count of documents generated by this source */ protected final synchronized void addDoc() { ++docsCount; ++totalDocsCount; @@ -130,21 +135,25 @@ public abstract class ContentSource { type = typeInt.intValue(); } } - switch (type) { - case BZIP: - try { + + try { + switch (type) { + case BZIP: // According to BZip2CompressorInputStream's code, it reads the first // two file header chars ('B' and 'Z'). It is important to wrap the // underlying input stream with a buffered one since // Bzip2CompressorInputStream uses the read() method exclusively. is = csFactory.createCompressorInputStream("bzip2", is); - } catch (CompressorException e) { - IOException ioe = new IOException(e.getMessage()); - ioe.initCause(e); - throw ioe; - } - break; - default: // Do nothing, stay with FileInputStream + break; + case GZIP: + is = csFactory.createCompressorInputStream("gz", is); + break; + default: // Do nothing, stay with FileInputStream + } + } catch (CompressorException e) { + IOException ioe = new IOException(e.getMessage()); + ioe.initCause(e); + throw ioe; } return is; diff --git a/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DemoHTMLParser.java b/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DemoHTMLParser.java index d57777a0036..873c658a338 100755 --- a/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DemoHTMLParser.java +++ b/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DemoHTMLParser.java @@ -29,11 +29,14 @@ import java.util.Properties; */ public class DemoHTMLParser implements org.apache.lucene.benchmark.byTask.feeds.HTMLParser { - public DocData parse(DocData docData, String name, Date date, Reader reader, DateFormat dateFormat) throws IOException, InterruptedException { + public DocData parse(DocData docData, String name, Date date, String title, Reader reader, DateFormat dateFormat) throws IOException, InterruptedException { org.apache.lucene.demo.html.HTMLParser p = new org.apache.lucene.demo.html.HTMLParser(reader); // title - String title = p.getTitle(); + if (title==null) { + title = p.getTitle(); + } + // properties Properties props = p.getMetaTags(); // body diff --git a/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/HTMLParser.java b/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/HTMLParser.java index 6c8b9fa4a87..47eed373e5f 100755 --- a/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/HTMLParser.java +++ b/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/HTMLParser.java @@ -29,16 +29,18 @@ public interface HTMLParser { /** * Parse the input Reader and return DocData. - * A provided name or date is used for the result, otherwise an attempt is - * made to set them from the parsed data. - * @param dateFormat date formatter to use for extracting the date. - * @param name name of the result doc data. If null, attempt to set by parsed data. + * The provided name,title,date are used for the result, unless when they're null, + * in which case an attempt is made to set them from the parsed data. + * @param docData result reused + * @param name name of the result doc data. * @param date date of the result doc data. If null, attempt to set by parsed data. - * @param reader of html text to parse. + * @param title title of the result doc data. If null, attempt to set by parsed data. + * @param reader reader of html text to parse. + * @param dateFormat date formatter to use for extracting the date. * @return Parsed doc data. * @throws IOException * @throws InterruptedException */ - public DocData parse(DocData docData, String name, Date date, Reader reader, DateFormat dateFormat) throws IOException, InterruptedException; + public DocData parse(DocData docData, String name, Date date, String title, Reader reader, DateFormat dateFormat) throws IOException, InterruptedException; } diff --git a/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/LongToEnglishQueryMaker.java b/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/LongToEnglishQueryMaker.java index 6abe9fcccd9..fdee2882518 100644 --- a/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/LongToEnglishQueryMaker.java +++ b/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/LongToEnglishQueryMaker.java @@ -1,3 +1,20 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + package org.apache.lucene.benchmark.byTask.feeds; import org.apache.lucene.analysis.Analyzer; diff --git a/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecContentSource.java b/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecContentSource.java index 1101e661c91..d60a12ccf90 100644 --- a/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecContentSource.java +++ b/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecContentSource.java @@ -19,8 +19,8 @@ package org.apache.lucene.benchmark.byTask.feeds; import java.io.BufferedReader; import java.io.File; -import java.io.FileInputStream; import java.io.IOException; +import java.io.InputStream; import java.io.InputStreamReader; import java.io.Reader; import java.text.DateFormat; @@ -29,8 +29,8 @@ import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.Date; import java.util.Locale; -import java.util.zip.GZIPInputStream; +import org.apache.lucene.benchmark.byTask.feeds.TrecDocParser.ParsePathType; import org.apache.lucene.benchmark.byTask.utils.Config; import org.apache.lucene.benchmark.byTask.utils.StringBuilderReader; import org.apache.lucene.util.ThreadInterruptedException; @@ -46,8 +46,10 @@ import org.apache.lucene.util.ThreadInterruptedException; *

  • docs.dir - specifies the directory where the TREC files reside. * Can be set to a relative path if "work.dir" is also specified * (default=trec). + *
  • trec.doc.parser - specifies the {@link TrecDocParser} class to use for + * parsing the TREC documents content (default=TrecGov2Parser). *
  • html.parser - specifies the {@link HTMLParser} class to use for - * parsing the TREC documents content (default=DemoHTMLParser). + * parsing the HTML parts of the TREC documents content (default=DemoHTMLParser). *
  • content.source.encoding - if not specified, ISO-8859-1 is used. *
  • content.source.excludeIteration - if true, do not append iteration number to docname * @@ -59,22 +61,24 @@ public class TrecContentSource extends ContentSource { ParsePosition pos; } - private static final String DATE = "Date: "; - private static final String DOCHDR = ""; - private static final String TERMINATING_DOCHDR = ""; - private static final String DOCNO = ""; - private static final String TERMINATING_DOCNO = ""; - private static final String DOC = ""; - private static final String TERMINATING_DOC = ""; + public static final String DOCNO = ""; + public static final String TERMINATING_DOCNO = ""; + public static final String DOC = ""; + public static final String TERMINATING_DOC = ""; - private static final String NEW_LINE = System.getProperty("line.separator"); + /** separator between lines in the byffer */ + public static final String NEW_LINE = System.getProperty("line.separator"); private static final String DATE_FORMATS [] = { - "EEE, dd MMM yyyy kk:mm:ss z", // Tue, 09 Dec 2003 22:39:08 GMT - "EEE MMM dd kk:mm:ss yyyy z", // Tue Dec 09 16:45:08 2003 EST - "EEE, dd-MMM-':'y kk:mm:ss z", // Tue, 09 Dec 2003 22:39:08 GMT - "EEE, dd-MMM-yyy kk:mm:ss z", // Tue, 09 Dec 2003 22:39:08 GMT - "EEE MMM dd kk:mm:ss yyyy", // Tue Dec 09 16:45:08 2003 + "EEE, dd MMM yyyy kk:mm:ss z", // Tue, 09 Dec 2003 22:39:08 GMT + "EEE MMM dd kk:mm:ss yyyy z", // Tue Dec 09 16:45:08 2003 EST + "EEE, dd-MMM-':'y kk:mm:ss z", // Tue, 09 Dec 2003 22:39:08 GMT + "EEE, dd-MMM-yyy kk:mm:ss z", // Tue, 09 Dec 2003 22:39:08 GMT + "EEE MMM dd kk:mm:ss yyyy", // Tue Dec 09 16:45:08 2003 + "dd MMM yyyy", // 1 March 1994 + "MMM dd, yyyy", // February 3, 1994 + "yyMMdd", // 910513 + "hhmm z.z.z. MMM dd, yyyy", // 0901 u.t.c. April 28, 1994 }; private ThreadLocal dateFormats = new ThreadLocal(); @@ -83,7 +87,7 @@ public class TrecContentSource extends ContentSource { private File dataDir = null; private ArrayList inputFiles = new ArrayList(); private int nextFile = 0; - private int rawDocSize; + private int rawDocSize = 0; // Use to synchronize threads on reading from the TREC documents. private Object lock = new Object(); @@ -92,7 +96,10 @@ public class TrecContentSource extends ContentSource { BufferedReader reader; int iteration = 0; HTMLParser htmlParser; + private boolean excludeDocnameIteration; + private TrecDocParser trecDocParser = new TrecGov2Parser(); // default + ParsePathType currPathType; // not private for tests private DateFormatInfo getDateFormatInfo() { DateFormatInfo dfi = dateFormats.get(); @@ -118,7 +125,7 @@ public class TrecContentSource extends ContentSource { return sb; } - private Reader getTrecDocReader(StringBuilder docBuffer) { + Reader getTrecDocReader(StringBuilder docBuffer) { StringBuilderReader r = trecDocReader.get(); if (r == null) { r = new StringBuilderReader(docBuffer); @@ -129,10 +136,21 @@ public class TrecContentSource extends ContentSource { return r; } - // read until finding a line that starts with the specified prefix, or a terminating tag has been found. - private void read(StringBuilder buf, String prefix, boolean collectMatchLine, - boolean collectAll, String terminatingTag) - throws IOException, NoMoreDataException { + HTMLParser getHtmlParser() { + return htmlParser; + } + + /** + * Read until a line starting with the specified lineStart. + * @param buf buffer for collecting the data if so specified/ + * @param lineStart line start to look for, must not be null. + * @param collectMatchLine whether to collect the matching line into buffer. + * @param collectAll whether to collect all lines into buffer. + * @throws IOException + * @throws NoMoreDataException + */ + private void read(StringBuilder buf, String lineStart, + boolean collectMatchLine, boolean collectAll) throws IOException, NoMoreDataException { String sep = ""; while (true) { String line = reader.readLine(); @@ -144,20 +162,12 @@ public class TrecContentSource extends ContentSource { rawDocSize += line.length(); - if (line.startsWith(prefix)) { + if (lineStart!=null && line.startsWith(lineStart)) { if (collectMatchLine) { buf.append(sep).append(line); sep = NEW_LINE; } - break; - } - - if (terminatingTag != null && line.startsWith(terminatingTag)) { - // didn't find the prefix that was asked, but the terminating - // tag was found. set the length to 0 to signal no match was - // found. - buf.setLength(0); - break; + return; } if (collectAll) { @@ -169,7 +179,7 @@ public class TrecContentSource extends ContentSource { void openNextFile() throws NoMoreDataException, IOException { close(); - int retries = 0; + currPathType = null; while (true) { if (nextFile >= inputFiles.size()) { // exhausted files, start a new round, unless forever set to false. @@ -184,13 +194,13 @@ public class TrecContentSource extends ContentSource { System.out.println("opening: " + f + " length: " + f.length()); } try { - GZIPInputStream zis = new GZIPInputStream(new FileInputStream(f), BUFFER_SIZE); - reader = new BufferedReader(new InputStreamReader(zis, encoding), BUFFER_SIZE); + InputStream inputStream = getInputStream(f); // support either gzip, bzip2, or regular text file, by extension + reader = new BufferedReader(new InputStreamReader(inputStream, encoding), BUFFER_SIZE); + currPathType = TrecDocParser.pathType(f); return; } catch (Exception e) { - retries++; - if (retries < 20 && verbose) { - System.out.println("Skipping 'bad' file " + f.getAbsolutePath() + " #retries=" + retries); + if (verbose) { + System.out.println("Skipping 'bad' file " + f.getAbsolutePath()+" due to "+e.getMessage()); continue; } throw new NoMoreDataException(); @@ -198,7 +208,7 @@ public class TrecContentSource extends ContentSource { } } - Date parseDate(String dateStr) { + public Date parseDate(String dateStr) { dateStr = dateStr.trim(); DateFormatInfo dfi = getDateFormatInfo(); for (int i = 0; i < dfi.dfs.length; i++) { @@ -237,70 +247,47 @@ public class TrecContentSource extends ContentSource { @Override public DocData getNextDocData(DocData docData) throws NoMoreDataException, IOException { - String dateStr = null, name = null; - Reader r = null; + String name = null; + StringBuilder docBuf = getDocBuffer(); + ParsePathType parsedPathType; + // protect reading from the TREC files by multiple threads. The rest of the - // method, i.e., parsing the content and returning the DocData can run - // unprotected. + // method, i.e., parsing the content and returning the DocData can run unprotected. synchronized (lock) { if (reader == null) { openNextFile(); } - - StringBuilder docBuf = getDocBuffer(); - // 1. skip until doc start + // 1. skip until doc start - required for all TREC formats docBuf.setLength(0); - read(docBuf, DOC, false, false, null); - - // 2. name + read(docBuf, DOC, false, false); + + // save parsedFile for passing trecDataParser after the sync block, in + // case another thread will open another file in between. + parsedPathType = currPathType; + + // 2. name - required for all TREC formats docBuf.setLength(0); - read(docBuf, DOCNO, true, false, null); + read(docBuf, DOCNO, true, false); name = docBuf.substring(DOCNO.length(), docBuf.indexOf(TERMINATING_DOCNO, - DOCNO.length())); - if (!excludeDocnameIteration) + DOCNO.length())).trim(); + + if (!excludeDocnameIteration) { name = name + "_" + iteration; - - // 3. skip until doc header - docBuf.setLength(0); - read(docBuf, DOCHDR, false, false, null); - - boolean findTerminatingDocHdr = false; - - // 4. date - look for the date only until /DOCHDR - docBuf.setLength(0); - read(docBuf, DATE, true, false, TERMINATING_DOCHDR); - if (docBuf.length() != 0) { - // Date found. - dateStr = docBuf.substring(DATE.length()); - findTerminatingDocHdr = true; } - // 5. skip until end of doc header - if (findTerminatingDocHdr) { - docBuf.setLength(0); - read(docBuf, TERMINATING_DOCHDR, false, false, null); - } - - // 6. collect until end of doc + // 3. read all until end of doc docBuf.setLength(0); - read(docBuf, TERMINATING_DOC, false, true, null); - - // 7. Set up a Reader over the read content - r = getTrecDocReader(docBuf); - // Resetting the thread's reader means it will reuse the instance - // allocated as well as re-read from docBuf. - r.reset(); - - // count char length of parsed html text (larger than the plain doc body text). - addBytes(docBuf.length()); + read(docBuf, TERMINATING_DOC, false, true); } + + // count char length of text to be parsed (may be larger than the resulted plain doc body text). + addBytes(docBuf.length()); // This code segment relies on HtmlParser being thread safe. When we get // here, everything else is already private to that thread, so we're safe. - Date date = dateStr != null ? parseDate(dateStr) : null; try { - docData = htmlParser.parse(docData, name, date, r, null); + docData = trecDocParser.parse(docData, name, this, docBuf, parsedPathType); addDoc(); } catch (InterruptedException ie) { throw new ThreadInterruptedException(ie); @@ -322,27 +309,40 @@ public class TrecContentSource extends ContentSource { @Override public void setConfig(Config config) { super.setConfig(config); + // dirs File workDir = new File(config.get("work.dir", "work")); String d = config.get("docs.dir", "trec"); dataDir = new File(d); if (!dataDir.isAbsolute()) { dataDir = new File(workDir, d); } + // files collectFiles(dataDir, inputFiles); if (inputFiles.size() == 0) { throw new IllegalArgumentException("No files in dataDir: " + dataDir); } + // trec doc parser try { - String parserClassName = config.get("html.parser", - "org.apache.lucene.benchmark.byTask.feeds.DemoHTMLParser"); - htmlParser = Class.forName(parserClassName).asSubclass(HTMLParser.class).newInstance(); + String trecDocParserClassName = config.get("trec.doc.parser", "org.apache.lucene.benchmark.byTask.feeds.TrecGov2Parser"); + trecDocParser = Class.forName(trecDocParserClassName).asSubclass(TrecDocParser.class).newInstance(); } catch (Exception e) { // Should not get here. Throw runtime exception. throw new RuntimeException(e); } + // html parser + try { + String htmlParserClassName = config.get("html.parser", + "org.apache.lucene.benchmark.byTask.feeds.DemoHTMLParser"); + htmlParser = Class.forName(htmlParserClassName).asSubclass(HTMLParser.class).newInstance(); + } catch (Exception e) { + // Should not get here. Throw runtime exception. + throw new RuntimeException(e); + } + // encoding if (encoding == null) { encoding = "ISO-8859-1"; } + // iteration exclusion in doc name excludeDocnameIteration = config.get("content.source.excludeIteration", false); } diff --git a/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecDocParser.java b/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecDocParser.java new file mode 100644 index 00000000000..216cdebd7c7 --- /dev/null +++ b/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecDocParser.java @@ -0,0 +1,136 @@ +package org.apache.lucene.benchmark.byTask.feeds; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.File; +import java.io.IOException; +import java.util.HashMap; +import java.util.Locale; +import java.util.Map; + +/** + * Parser for trec doc content, invoked on doc text excluding and + * which are handled in TrecContentSource. Required to be stateless and hence thread safe. + */ +public abstract class TrecDocParser { + + /** Types of trec parse paths, */ + public enum ParsePathType { GOV2, FBIS, FT, FR94, LATIMES } + + /** trec parser type used for unknown extensions */ + public static final ParsePathType DEFAULT_PATH_TYPE = ParsePathType.GOV2; + + static final Map pathType2parser = new HashMap(); + static { + pathType2parser.put(ParsePathType.GOV2, new TrecGov2Parser()); + pathType2parser.put(ParsePathType.FBIS, new TrecFBISParser()); + pathType2parser.put(ParsePathType.FR94, new TrecFR94Parser()); + pathType2parser.put(ParsePathType.FT, new TrecFTParser()); + pathType2parser.put(ParsePathType.LATIMES, new TrecLATimesParser()); + } + + static final Map pathName2Type = new HashMap(); + static { + for (ParsePathType ppt : ParsePathType.values()) { + pathName2Type.put(ppt.name().toUpperCase(Locale.ENGLISH),ppt); + } + } + + /** max length of walk up from file to its ancestors when looking for a known path type */ + private static final int MAX_PATH_LENGTH = 10; + + /** + * Compute the path type of a file by inspecting name of file and its parents + */ + public static ParsePathType pathType(File f) { + int pathLength = 0; + while (f != null && ++pathLength < MAX_PATH_LENGTH) { + ParsePathType ppt = pathName2Type.get(f.getName().toUpperCase(Locale.ENGLISH)); + if (ppt!=null) { + return ppt; + } + f = f.getParentFile(); + } + return DEFAULT_PATH_TYPE; + } + + /** + * parse the text prepared in docBuf into a result DocData, + * no synchronization is required. + * @param docData reusable result + * @param name name that should be set to the result + * @param trecSrc calling trec content source + * @param docBuf text to parse + * @param pathType type of parsed file, or null if unknown - may be used by + * parsers to alter their behavior according to the file path type. + */ + public abstract DocData parse(DocData docData, String name, TrecContentSource trecSrc, + StringBuilder docBuf, ParsePathType pathType) throws IOException, InterruptedException; + + /** + * strip tags from buf: each tag is replaced by a single blank. + * @return text obtained when stripping all tags from buf (Input StringBuilder is unmodified). + */ + public static String stripTags(StringBuilder buf, int start) { + return stripTags(buf.substring(start),0); + } + + /** + * strip tags from input. + * @see #stripTags(StringBuilder, int) + */ + public static String stripTags(String buf, int start) { + if (start>0) { + buf = buf.substring(0); + } + return buf.replaceAll("<[^>]*>", " "); + } + + /** + * Extract from buf the text of interest within specified tags + * @param buf entire input text + * @param startTag tag marking start of text of interest + * @param endTag tag marking end of text of interest + * @param maxPos if ≥ 0 sets a limit on start of text of interest + * @return text of interest or null if not found + */ + public static String extract(StringBuilder buf, String startTag, String endTag, int maxPos, String noisePrefixes[]) { + int k1 = buf.indexOf(startTag); + if (k1>=0 && (maxPos<0 || k1=0 && (maxPos<0 || k2=0 && k1a2<>1?",0)); + //} + +} diff --git a/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecFBISParser.java b/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecFBISParser.java new file mode 100644 index 00000000000..8efcd04e91d --- /dev/null +++ b/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecFBISParser.java @@ -0,0 +1,65 @@ +package org.apache.lucene.benchmark.byTask.feeds; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.util.Date; + +/** + * Parser for the FBIS docs in trec disks 4+5 collection format + */ +public class TrecFBISParser extends TrecDocParser { + + private static final String HEADER = "
    "; + private static final String HEADER_END = "
    "; + private static final int HEADER_END_LENGTH = HEADER_END.length(); + + private static final String DATE1 = ""; + private static final String DATE1_END = ""; + + private static final String TI = ""; + private static final String TI_END = ""; + + @Override + public DocData parse(DocData docData, String name, TrecContentSource trecSrc, + StringBuilder docBuf, ParsePathType pathType) throws IOException, InterruptedException { + int mark = 0; // that much is skipped + // optionally skip some of the text, set date, title + Date date = null; + String title = null; + int h1 = docBuf.indexOf(HEADER); + if (h1>=0) { + int h2 = docBuf.indexOf(HEADER_END,h1); + mark = h2+HEADER_END_LENGTH; + // date... + String dateStr = extract(docBuf, DATE1, DATE1_END, h2, null); + if (dateStr != null) { + date = trecSrc.parseDate(dateStr); + } + // title... + title = extract(docBuf, TI, TI_END, h2, null); + } + docData.clear(); + docData.setName(name); + docData.setDate(date); + docData.setTitle(title); + docData.setBody(stripTags(docBuf, mark).toString()); + return docData; + } + +} diff --git a/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecFR94Parser.java b/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecFR94Parser.java new file mode 100644 index 00000000000..ce6492120d7 --- /dev/null +++ b/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecFR94Parser.java @@ -0,0 +1,66 @@ +package org.apache.lucene.benchmark.byTask.feeds; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.util.Date; + +/** + * Parser for the FR94 docs in trec disks 4+5 collection format + */ +public class TrecFR94Parser extends TrecDocParser { + + private static final String TEXT = ""; + private static final int TEXT_LENGTH = TEXT.length(); + private static final String TEXT_END = ""; + + private static final String DATE = ""; + private static final String[] DATE_NOISE_PREFIXES = { + "DATE:", + "date:", //TODO improve date extraction for this format + "t.c.", + }; + private static final String DATE_END = ""; + + //TODO can we also extract title for this format? + + @Override + public DocData parse(DocData docData, String name, TrecContentSource trecSrc, + StringBuilder docBuf, ParsePathType pathType) throws IOException, InterruptedException { + int mark = 0; // that much is skipped + // optionally skip some of the text, set date (no title?) + Date date = null; + int h1 = docBuf.indexOf(TEXT); + if (h1>=0) { + int h2 = docBuf.indexOf(TEXT_END,h1); + mark = h1+TEXT_LENGTH; + // date... + String dateStr = extract(docBuf, DATE, DATE_END, h2, DATE_NOISE_PREFIXES); + if (dateStr != null) { + dateStr = stripTags(dateStr,0).toString(); + date = trecSrc.parseDate(dateStr.trim()); + } + } + docData.clear(); + docData.setName(name); + docData.setDate(date); + docData.setBody(stripTags(docBuf, mark).toString()); + return docData; + } + +} diff --git a/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecFTParser.java b/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecFTParser.java new file mode 100644 index 00000000000..ab39d9c2860 --- /dev/null +++ b/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecFTParser.java @@ -0,0 +1,57 @@ +package org.apache.lucene.benchmark.byTask.feeds; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.util.Date; + +/** + * Parser for the FT docs in trec disks 4+5 collection format + */ +public class TrecFTParser extends TrecDocParser { + + private static final String DATE = ""; + private static final String DATE_END = ""; + + private static final String HEADLINE = ""; + private static final String HEADLINE_END = ""; + + @Override + public DocData parse(DocData docData, String name, TrecContentSource trecSrc, + StringBuilder docBuf, ParsePathType pathType) throws IOException, InterruptedException { + int mark = 0; // that much is skipped + + // date... + Date date = null; + String dateStr = extract(docBuf, DATE, DATE_END, -1, null); + if (dateStr != null) { + date = trecSrc.parseDate(dateStr); + } + + // title... + String title = extract(docBuf, HEADLINE, HEADLINE_END, -1, null); + + docData.clear(); + docData.setName(name); + docData.setDate(date); + docData.setTitle(title); + docData.setBody(stripTags(docBuf, mark).toString()); + return docData; + } + +} diff --git a/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecGov2Parser.java b/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecGov2Parser.java new file mode 100755 index 00000000000..ef8371d1735 --- /dev/null +++ b/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecGov2Parser.java @@ -0,0 +1,59 @@ +package org.apache.lucene.benchmark.byTask.feeds; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.io.Reader; +import java.util.Date; + +/** + * Parser for the GOV2 collection format + */ +public class TrecGov2Parser extends TrecDocParser { + + private static final String DATE = "Date: "; + private static final String DATE_END = TrecContentSource.NEW_LINE; + + private static final String DOCHDR = ""; + private static final String TERMINATING_DOCHDR = ""; + private static final int TERMINATING_DOCHDR_LENGTH = TERMINATING_DOCHDR.length(); + + @Override + public DocData parse(DocData docData, String name, TrecContentSource trecSrc, + StringBuilder docBuf, ParsePathType pathType) throws IOException, InterruptedException { + // Set up a (per-thread) reused Reader over the read content, reset it to re-read from docBuf + Reader r = trecSrc.getTrecDocReader(docBuf); + + // skip some of the text, optionally set date + Date date = null; + int h1 = docBuf.indexOf(DOCHDR); + if (h1>=0) { + int h2 = docBuf.indexOf(TERMINATING_DOCHDR,h1); + String dateStr = extract(docBuf, DATE, DATE_END, h2, null); + if (dateStr != null) { + date = trecSrc.parseDate(dateStr); + } + r.mark(h2+TERMINATING_DOCHDR_LENGTH); + } + + r.reset(); + HTMLParser htmlParser = trecSrc.getHtmlParser(); + return htmlParser.parse(docData, name, date, null, r, null); + } + +} diff --git a/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecLATimesParser.java b/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecLATimesParser.java new file mode 100644 index 00000000000..367015bee36 --- /dev/null +++ b/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecLATimesParser.java @@ -0,0 +1,71 @@ +package org.apache.lucene.benchmark.byTask.feeds; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.util.Date; + +/** + * Parser for the FT docs in trec disks 4+5 collection format + */ +public class TrecLATimesParser extends TrecDocParser { + + private static final String DATE = ""; + private static final String DATE_END = ""; + private static final String DATE_NOISE = "day,"; // anything aftre the ',' + + private static final String SUBJECT = ""; + private static final String SUBJECT_END = ""; + private static final String HEADLINE = ""; + private static final String HEADLINE_END = ""; + + @Override + public DocData parse(DocData docData, String name, TrecContentSource trecSrc, + StringBuilder docBuf, ParsePathType pathType) throws IOException, InterruptedException { + int mark = 0; // that much is skipped + + // date... + Date date = null; + String dateStr = extract(docBuf, DATE, DATE_END, -1, null); + if (dateStr != null) { + int d2a = dateStr.indexOf(DATE_NOISE); + if (d2a > 0) { + dateStr = dateStr.substring(0,d2a+3); // we need the "day" part + } + dateStr = stripTags(dateStr,0).toString(); + date = trecSrc.parseDate(dateStr.trim()); + } + + // title... first try with SUBJECT, them with HEADLINE + String title = extract(docBuf, SUBJECT, SUBJECT_END, -1, null); + if (title==null) { + title = extract(docBuf, HEADLINE, HEADLINE_END, -1, null); + } + if (title!=null) { + title = stripTags(title,0).toString().trim(); + } + + docData.clear(); + docData.setName(name); + docData.setDate(date); + docData.setTitle(title); + docData.setBody(stripTags(docBuf, mark).toString()); + return docData; + } + +} diff --git a/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecParserByPath.java b/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecParserByPath.java new file mode 100644 index 00000000000..fc882035a01 --- /dev/null +++ b/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecParserByPath.java @@ -0,0 +1,33 @@ +package org.apache.lucene.benchmark.byTask.feeds; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +/** + * Parser for trec docs which selects the parser to apply according + * to the source files path, defaulting to {@link TrecGov2Parser}. + */ +public class TrecParserByPath extends TrecDocParser { + + @Override + public DocData parse(DocData docData, String name, TrecContentSource trecSrc, + StringBuilder docBuf, ParsePathType pathType) throws IOException, InterruptedException { + return pathType2parser.get(pathType).parse(docData, name, trecSrc, docBuf, pathType); + } + +} diff --git a/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/CreateIndexTask.java b/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/CreateIndexTask.java index a347c9c5661..5a8f0ddbb9d 100644 --- a/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/CreateIndexTask.java +++ b/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/CreateIndexTask.java @@ -46,8 +46,7 @@ import java.io.PrintStream; * Create an index.
    * Other side effects: index writer object in perfRunData is set.
    * Relevant properties: merge.factor (default 10), - * max.buffered (default no flush), max.field.length (default - * 10,000 tokens), max.field.length, compound (default true), ram.flush.mb [default 0], + * max.buffered (default no flush), compound (default true), ram.flush.mb [default 0], * merge.policy (default org.apache.lucene.index.LogByteSizeMergePolicy), * merge.scheduler (default * org.apache.lucene.index.ConcurrentMergeScheduler), @@ -153,7 +152,6 @@ public class CreateIndexTask extends PerfTask { logMergePolicy.setMergeFactor(config.get("merge.factor",OpenIndexTask.DEFAULT_MERGE_PFACTOR)); } } - iwConf.setMaxFieldLength(config.get("max.field.length",OpenIndexTask.DEFAULT_MAX_FIELD_LENGTH)); final double ramBuffer = config.get("ram.flush.mb",OpenIndexTask.DEFAULT_RAM_FLUSH_MB); final int maxBuffered = config.get("max.buffered",OpenIndexTask.DEFAULT_MAX_BUFFERED); if (maxBuffered == IndexWriterConfig.DISABLE_AUTO_FLUSH) { diff --git a/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/NearRealtimeReaderTask.java b/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/NearRealtimeReaderTask.java index 398c72fdc2c..47ea3f428d9 100644 --- a/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/NearRealtimeReaderTask.java +++ b/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/NearRealtimeReaderTask.java @@ -59,7 +59,7 @@ public class NearRealtimeReaderTask extends PerfTask { } long t = System.currentTimeMillis(); - IndexReader r = IndexReader.open(w); + IndexReader r = IndexReader.open(w, true); runData.setIndexReader(r); // Transfer our reference to runData r.decRef(); diff --git a/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/OpenIndexTask.java b/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/OpenIndexTask.java index fe61e4442f0..d83dcd398de 100644 --- a/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/OpenIndexTask.java +++ b/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/OpenIndexTask.java @@ -26,7 +26,6 @@ import org.apache.lucene.index.LogMergePolicy; import org.apache.lucene.index.IndexWriterConfig.OpenMode; import java.io.IOException; - /** * Open an index writer. *
    Other side effects: index writer object in perfRunData is set. @@ -41,7 +40,6 @@ import java.io.IOException; public class OpenIndexTask extends PerfTask { public static final int DEFAULT_MAX_BUFFERED = IndexWriterConfig.DEFAULT_MAX_BUFFERED_DOCS; - public static final int DEFAULT_MAX_FIELD_LENGTH = IndexWriterConfig.UNLIMITED_FIELD_LENGTH; public static final int DEFAULT_MERGE_PFACTOR = LogMergePolicy.DEFAULT_MERGE_FACTOR; public static final double DEFAULT_RAM_FLUSH_MB = (int) IndexWriterConfig.DEFAULT_RAM_BUFFER_SIZE_MB; private String commitUserData; diff --git a/solr/src/java/org/apache/solr/search/SolrSimilarity.java b/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/WaitForMergesTask.java similarity index 63% rename from solr/src/java/org/apache/solr/search/SolrSimilarity.java rename to modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/WaitForMergesTask.java index 60525376d57..6cd1c16ff13 100644 --- a/solr/src/java/org/apache/solr/search/SolrSimilarity.java +++ b/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/WaitForMergesTask.java @@ -1,3 +1,4 @@ +package org.apache.lucene.benchmark.byTask.tasks; /** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with @@ -15,23 +16,25 @@ * limitations under the License. */ -package org.apache.solr.search; - -import org.apache.lucene.search.DefaultSimilarity; - import java.util.HashMap; +import java.util.Map; + +import org.apache.lucene.benchmark.byTask.PerfRunData; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.IndexReader; /** + * Waits for merges to finish. */ -// don't make it public for now... easier to change later. +public class WaitForMergesTask extends PerfTask { -// This class is currently unused. -class SolrSimilarity extends DefaultSimilarity { - private final HashMap lengthNormConfig = new HashMap(); - - public float lengthNorm(String fieldName, int numTerms) { - // Float f = lengthNormConfig. - // if (lengthNormDisabled.) - return super.lengthNorm(fieldName, numTerms); + public WaitForMergesTask(PerfRunData runData) { + super(runData); + } + + @Override + public int doLogic() throws Exception { + getRunData().getIndexWriter().waitForMerges(); + return 1; } } diff --git a/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/utils/StringBuilderReader.java b/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/utils/StringBuilderReader.java index c6e9510e01d..a10d5371c72 100644 --- a/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/utils/StringBuilderReader.java +++ b/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/utils/StringBuilderReader.java @@ -158,8 +158,10 @@ public class StringBuilderReader extends Reader { synchronized (lock) { this.sb = sb; length = sb.length(); + next = mark = 0; } } + @Override public long skip(long ns) throws IOException { synchronized (lock) { diff --git a/modules/benchmark/src/java/org/apache/lucene/benchmark/quality/utils/SubmissionReport.java b/modules/benchmark/src/java/org/apache/lucene/benchmark/quality/utils/SubmissionReport.java index 79e3f0b1644..2a02aa7cfa6 100644 --- a/modules/benchmark/src/java/org/apache/lucene/benchmark/quality/utils/SubmissionReport.java +++ b/modules/benchmark/src/java/org/apache/lucene/benchmark/quality/utils/SubmissionReport.java @@ -28,7 +28,7 @@ import org.apache.lucene.search.TopDocs; /** * Create a log ready for submission. * Extend this class and override - * {@link #report(QualityQuery, TopDocs, String, Searcher)} + * {@link #report(QualityQuery, TopDocs, String, IndexSearcher)} * to create different reports. */ public class SubmissionReport { diff --git a/modules/benchmark/src/java/org/apache/lucene/benchmark/utils/ExtractReuters.java b/modules/benchmark/src/java/org/apache/lucene/benchmark/utils/ExtractReuters.java index 3e4104b5b85..395d640fc72 100644 --- a/modules/benchmark/src/java/org/apache/lucene/benchmark/utils/ExtractReuters.java +++ b/modules/benchmark/src/java/org/apache/lucene/benchmark/utils/ExtractReuters.java @@ -29,146 +29,119 @@ import java.util.regex.Pattern; /** * Split the Reuters SGML documents into Simple Text files containing: Title, Date, Dateline, Body */ -public class ExtractReuters -{ - private File reutersDir; - private File outputDir; - private static final String LINE_SEPARATOR = System.getProperty("line.separator"); - - public ExtractReuters(File reutersDir, File outputDir) - { - this.reutersDir = reutersDir; - this.outputDir = outputDir; - System.out.println("Deleting all files in " + outputDir); - File [] files = outputDir.listFiles(); - for (int i = 0; i < files.length; i++) - { - files[i].delete(); - } +public class ExtractReuters { + private File reutersDir; + private File outputDir; + private static final String LINE_SEPARATOR = System.getProperty("line.separator"); + public ExtractReuters(File reutersDir, File outputDir) { + this.reutersDir = reutersDir; + this.outputDir = outputDir; + System.out.println("Deleting all files in " + outputDir); + for (File f : outputDir.listFiles()) { + f.delete(); } + } - public void extract() - { - File [] sgmFiles = reutersDir.listFiles(new FileFilter() - { - public boolean accept(File file) - { - return file.getName().endsWith(".sgm"); - } - }); - if (sgmFiles != null && sgmFiles.length > 0) - { - for (int i = 0; i < sgmFiles.length; i++) - { - File sgmFile = sgmFiles[i]; - extractFile(sgmFile); + public void extract() { + File[] sgmFiles = reutersDir.listFiles(new FileFilter() { + public boolean accept(File file) { + return file.getName().endsWith(".sgm"); + } + }); + if (sgmFiles != null && sgmFiles.length > 0) { + for (File sgmFile : sgmFiles) { + extractFile(sgmFile); + } + } else { + System.err.println("No .sgm files in " + reutersDir); + } + } + + Pattern EXTRACTION_PATTERN = Pattern + .compile("(.*?)|(.*?)|(.*?)"); + + private static String[] META_CHARS = { "&", "<", ">", "\"", "'" }; + + private static String[] META_CHARS_SERIALIZATIONS = { "&", "<", + ">", """, "'" }; + + /** + * Override if you wish to change what is extracted + * + * @param sgmFile + */ + protected void extractFile(File sgmFile) { + try { + BufferedReader reader = new BufferedReader(new FileReader(sgmFile)); + + StringBuilder buffer = new StringBuilder(1024); + StringBuilder outBuffer = new StringBuilder(1024); + + String line = null; + int docNumber = 0; + while ((line = reader.readLine()) != null) { + // when we see a closing reuters tag, flush the file + + if (line.indexOf("(.*?)|(.*?)|(.*?)"); - - private static String[] META_CHARS - = {"&", "<", ">", "\"", "'"}; - - private static String[] META_CHARS_SERIALIZATIONS - = {"&", "<", ">", """, "'"}; - - /** - * Override if you wish to change what is extracted - * - * @param sgmFile - */ - protected void extractFile(File sgmFile) - { - try - { - BufferedReader reader = new BufferedReader(new FileReader(sgmFile)); - - StringBuilder buffer = new StringBuilder(1024); - StringBuilder outBuffer = new StringBuilder(1024); - - String line = null; - int docNumber = 0; - while ((line = reader.readLine()) != null) - { - //when we see a closing reuters tag, flush the file - - if (line.indexOf(" org.apache.lucene.benchmark.utils.ExtractReuters "); - } + private static void printUsage() { + System.err.println("Usage: java -cp <...> org.apache.lucene.benchmark.utils.ExtractReuters "); + } + } diff --git a/modules/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java b/modules/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java index 90c970f402b..1df92fc92c4 100755 --- a/modules/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java +++ b/modules/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java @@ -514,6 +514,7 @@ public class TestPerfTasksLogic extends BenchmarkTestCase { "{ [ AddDoc]: 4} : * ", "ResetInputs ", "{ [ AddDoc]: 4} : * ", + "WaitForMerges", "CloseIndex", }; @@ -549,6 +550,7 @@ public class TestPerfTasksLogic extends BenchmarkTestCase { " ResetSystemErase", " CreateIndex", " { \"AddDocs\" AddDoc > : * ", + " WaitForMerges", " CloseIndex", "} : 2", }; diff --git a/modules/benchmark/src/test/org/apache/lucene/benchmark/byTask/feeds/TrecContentSourceTest.java b/modules/benchmark/src/test/org/apache/lucene/benchmark/byTask/feeds/TrecContentSourceTest.java index a178c6a6b1f..8222e5782ff 100644 --- a/modules/benchmark/src/test/org/apache/lucene/benchmark/byTask/feeds/TrecContentSourceTest.java +++ b/modules/benchmark/src/test/org/apache/lucene/benchmark/byTask/feeds/TrecContentSourceTest.java @@ -18,14 +18,20 @@ package org.apache.lucene.benchmark.byTask.feeds; */ import java.io.BufferedReader; +import java.io.File; import java.io.IOException; import java.io.StringReader; import java.text.ParseException; +import java.util.Arrays; import java.util.Date; +import java.util.HashSet; +import java.util.Properties; +import org.apache.lucene.benchmark.byTask.feeds.TrecDocParser.ParsePathType; import org.apache.lucene.benchmark.byTask.utils.Config; import org.apache.lucene.document.DateTools; import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.util._TestUtil; public class TrecContentSourceTest extends LuceneTestCase { @@ -329,5 +335,62 @@ public class TrecContentSourceTest extends LuceneTestCase { // Don't test that NoMoreDataException is thrown, since the forever flag is // turned on. } + + /** + * Open a trec content source over a directory with files of all trec path types and all + * supported formats - bzip, gzip, txt. + */ + public void testTrecFeedDirAllTypes() throws Exception { + File dataDir = _TestUtil.getTempDir("trecFeedAllTypes"); + _TestUtil.unzip(getDataFile("trecdocs.zip"), dataDir); + TrecContentSource tcs = new TrecContentSource(); + Properties props = new Properties(); + props.setProperty("print.props", "false"); + props.setProperty("content.source.verbose", "false"); + props.setProperty("content.source.excludeIteration", "true"); + props.setProperty("doc.maker.forever", "false"); + props.setProperty("docs.dir", dataDir.getCanonicalPath().replace('\\','/')); + props.setProperty("trec.doc.parser", TrecParserByPath.class.getName()); + props.setProperty("content.source.forever", "false"); + tcs.setConfig(new Config(props)); + tcs.resetInputs(); + DocData dd = new DocData(); + int n = 0; + boolean gotExpectedException = false; + HashSet unseenTypes = new HashSet(Arrays.asList(ParsePathType.values())); + try { + while (n<100) { // arbiterary limit to prevent looping forever in case of test failure + dd = tcs.getNextDocData(dd); + ++n; + assertNotNull("doc data "+n+" should not be null!", dd); + unseenTypes.remove(tcs.currPathType); + switch(tcs.currPathType) { + case GOV2: + assertDocData(dd, "TEST-000", "TEST-000 title", "TEST-000 text", tcs.parseDate("Sun, 11 Jan 2009 08:00:00 GMT")); + break; + case FBIS: + assertDocData(dd, "TEST-001", "TEST-001 Title", "TEST-001 text", tcs.parseDate("1 January 1991")); + break; + case FR94: + // no title extraction in this source for now + assertDocData(dd, "TEST-002", null, "DEPARTMENT OF SOMETHING", tcs.parseDate("February 3, 1994")); + break; + case FT: + assertDocData(dd, "TEST-003", "Test-003 title", "Some pub text", tcs.parseDate("980424")); + break; + case LATIMES: + assertDocData(dd, "TEST-004", "Test-004 Title", "Some paragraph", tcs.parseDate("January 17, 1997, Sunday")); + break; + default: + assertTrue("Should never get here!", false); + } + } + } catch (NoMoreDataException e) { + gotExpectedException = true; + } + assertTrue("Should have gotten NoMoreDataException!", gotExpectedException); + assertEquals("Wrong numbre of documents created by osurce!",5,n); + assertTrue("Did not see all types!",unseenTypes.isEmpty()); + } } diff --git a/modules/benchmark/src/test/org/apache/lucene/benchmark/byTask/feeds/trecdocs.zip b/modules/benchmark/src/test/org/apache/lucene/benchmark/byTask/feeds/trecdocs.zip new file mode 100644 index 00000000000..8fe90d3c2f7 Binary files /dev/null and b/modules/benchmark/src/test/org/apache/lucene/benchmark/byTask/feeds/trecdocs.zip differ diff --git a/modules/build.xml b/modules/build.xml index 5a07003c66b..ccaa9f92a89 100644 --- a/modules/build.xml +++ b/modules/build.xml @@ -56,6 +56,7 @@ + @@ -65,6 +66,7 @@ + diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index 0550efb3e84..7beb1d35d61 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -26,10 +26,201 @@ Versions of Major Components --------------------- Apache Lucene trunk Apache Tika 0.8-SNAPSHOT -Carrot2 3.1.0 +Carrot2 3.4.2 Velocity 1.6.4 and Velocity Tools 2.0 +Upgrading from Solr 3.1-dev +---------------------- + +* The Lucene index format has changed and as a result, once you upgrade, + previous versions of Solr will no longer be able to read your indices. + In a master/slave configuration, all searchers/slaves should be upgraded + before the master. If the master were to be updated first, the older + searchers would not be able to read the new index format. + +* Setting abortOnConfigurationError=false is no longer supported + (since it has never worked properly). Solr will now warn you if + you attempt to set this configuration option at all. (see SOLR-1846) + +* The default logic for the 'mm' param of the 'dismax' QParser has + been changed. If no 'mm' param is specified (either in the query, + or as a default in solrconfig.xml) then the effective value of the + 'q.op' param (either in the query or as a default in solrconfig.xml + or from the 'defaultOperator' option in schema.xml) is used to + influence the behavior. If q.op is effectively "AND" then mm=100%. + If q.op is effectively "OR" then mm=0%. Users who wish to force the + legacy behavior should set a default value for the 'mm' param in + their solrconfig.xml file. + +* In previous releases, sorting on fields that are "multiValued" + (either by explicit declaration in schema.xml or by implict behavior + because the "version" attribute on the schema was less then 1.2) did + not generally work, but it would sometimes silently act as if it + succeeded and order the docs arbitrarily. Solr will now fail on any + attempt to sort on a multivalued field + + + +Detailed Change List +---------------------- + +New Features +---------------------- + +* SOLR-571: The autowarmCount for LRUCaches (LRUCache and FastLRUCache) now + supports "percentages" which get evaluated relative the current size of + the cache when warming happens. + (Tomas Fernandez Lobbe and hossman) + +* SOLR-1915: DebugComponent now supports using a NamedList to model + Explanation objects in it's responses instead of + Explanation.toString (hossman) + +* SOLR-1932: New relevancy function queries: termfreq, tf, docfreq, idf + norm, maxdoc, numdocs. (yonik) + +* SOLR-1682: (SOLR-236, SOLR-237, SOLR-1773, SOLR-1311) Search grouping / Field collapsing. + (Martijn van Groningen, Emmanuel Keller, Shalin Shekhar Mangar, + Koji Sekiguchi, Iv�n de Prado, Ryan McKinley, Marc Sturlese, Peter Karich, + Bojan Smid, Charles Hornberger, Dieter Grad, Dmitry Lihachev, Doug Steigerwald, + Karsten Sperling, Michael Gundlach, Oleg Gnatovskiy, Thomas Traeger, + Harish Agarwal, yonik) + +* SOLR-1665: Add debug component options for timings, results and query info only (gsingers, hossman, yonik) + +* SOLR-2113: Add TermQParserPlugin, registered as "term". This is useful + when generating filter queries from terms returned from field faceting or + the terms component. Example: fq={!term f=weight}1.5 (hossman, yonik) + +* SOLR-2001: The query component will substitute an empty query that matches + no documents if the query parser returns null. This also prevents an + exception from being thrown by the default parser if "q" is missing. (yonik) + +* SOLR-2112: Solrj API now supports streaming results. (ryan) + +* SOLR-792: Adding PivotFacetComponent for Hierarchical faceting + (erik, Jeremy Hinegardner, Thibaut Lassalle, ryan) + +* LUCENE-2507: Added DirectSolrSpellChecker, which uses Lucene's DirectSpellChecker + to retrieve correction candidates directly from the term dictionary using + levenshtein automata. (rmuir) + +* SOLR-1873: SolrCloud - added shared/central config and core/shard managment via zookeeper, + built-in load balancing, and infrastructure for future SolrCloud work. + (yonik, Mark Miller) + +* SOLR-1729: Evaluation of NOW for date math is done only once per request for + consistency, and is also propagated to shards in distributed search. + Adding a parameter NOW= to the request will override the + current time. (Peter Sturge, yonik) + +Optimizations +---------------------- + +* SOLR-1875: Per-segment field faceting for single valued string fields. + Enable with facet.method=fcs, control the number of threads used with + the "threads" local param on the facet.field param. This algorithm will + only be faster in the presence of rapid index changes. (yonik) + +* SOLR-1904: When facet.enum.cache.minDf > 0 and the base doc set is a + SortedIntSet, convert to HashDocSet for better performance. (yonik) + +* SOLR-1843: A new "rootName" attribute is now available when + configuring in solrconfig.xml. If this attribute is set, + Solr will use it as the root name for all MBeans Solr exposes via + JMX. The default root name is "solr" followed by the core name. + (Constantijn Visinescu, hossman) + +* SOLR-2092: Speed up single-valued and multi-valued "fc" faceting. Typical + improvement is 5%, but can be much greater (up to 10x faster) when facet.offset + is very large (deep paging). (yonik) + +Bug Fixes +---------------------- + +* SOLR-1908: Fixed SignatureUpdateProcessor to fail to initialize on + invalid config. Specificly: a signatureField that does not exist, + or overwriteDupes=true with a signatureField that is not indexed. + (hossman) + +* SOLR-1824: IndexSchema will now fail to initialize if there is a + problem initializing one of the fields or field types. (hossman) + +* SOLR-1928: TermsComponent didn't correctly break ties for non-text + fields sorted by count. (yonik) + +* SOLR-2107: MoreLikeThisHandler doesn't work with alternate qparsers. (yonik) + +* SOLR-2108: Fixed false positives when using wildcard queries on fields with reversed + wildcard support. For example, a query of *zemog* would match documents that contain + 'gomez'. (Landon Kuhn via Robert Muir) + +* SOLR-1962: SolrCore#initIndex should not use a mix of indexPath and newIndexPath (Mark Miller) + +* SOLR-2275: fix DisMax 'mm' parsing to be tolerant of whitespace + (Erick Erickson via hossman) + +* SOLR-2307: fix bug in PHPSerializedResponseWriter (wt=phps) when + dealing with SolrDocumentList objects -- ie: sharded queries. + (Antonio Verni via hossman) + +* SOLR-2127: Fixed serialization of default core and indentation of solr.xml when serializing. + (Ephraim Ofir, Mark Miller) + +* SOLR-482: Provide more exception handling in CSVLoader (gsingers) + +* SOLR-2320: Fixed ReplicationHandler detail reporting for masters + (hossman) + +* SOLR-2085: Improve SolrJ behavior when FacetComponent comes before + QueryComponent (Tomas Salfischberger via hossman) + +* SOLR-1940: Fix SolrDispatchFilter behavior when Content-Type is + unknown (Lance Norskog and hossman) + +* SOLR-2339: Fix sorting to explicitly generate an error if you + attempt to sort on a multiValued field. (hossman) + +Other Changes +---------------------- + +* SOLR-1846: Eliminate support for the abortOnConfigurationError + option. It has never worked very well, and in recent versions of + Solr hasn't worked at all. (hossman) + +* SOLR-1889: The default logic for the 'mm' param of DismaxQParser and + ExtendedDismaxQParser has been changed to be determined based on the + effective value of the 'q.op' param (hossman) + +* SOLR-1946: Misc improvements to the SystemInfoHandler: /admin/system + (hossman) + +* SOLR-2289: Tweak spatial coords for example docs so they are a bit + more spread out (Erick Erickson via hossman) + +* SOLR-2288: Small tweaks to eliminate compiler warnings. primarily + using Generics where applicable in method/object declatations, and + adding @SuppressWarnings("unchecked") when appropriate (hossman) + +* SOLR-2350: Since Solr no longer requires XML files to be in UTF-8 + (see SOLR-96) SimplePostTool (aka: post.jar) has been improved to + work with files of any mime-type or charset. (hossman) + +Documentation +---------------------- + +* SOLR-2232: Improved README info on solr.solr.home in examples + (Eric Pugh and hossman) + +================== 3.1.0-dev ================== +Versions of Major Components +--------------------- +Apache Lucene trunk +Apache Tika 0.8-SNAPSHOT +Carrot2 3.4.2 + + Upgrading from Solr 1.4 ---------------------- @@ -66,25 +257,10 @@ Upgrading from Solr 1.4 * SOLR-1876: All Analyzers and TokenStreams are now final to enforce the decorator pattern. (rmuir, uschindler) -* Setting abortOnConfigurationError=false is no longer supported - (since it has never worked properly). Solr will now warn you if - you attempt to set this configuration option at all. (see SOLR-1846) - -* The default logic for the 'mm' param of the 'dismax' QParser has - been changed. If no 'mm' param is specified (either in the query, - or as a default in solrconfig.xml) then the effective value of the - 'q.op' param (either in the query or as a default in solrconfig.xml - or from the 'defaultOperator' option in schema.xml) is used to - influence the behavior. If q.op is effectively "AND" then mm=100%. - If q.op is effectively "OR" then mm=0%. Users who wish to force the - legacy behavior should set a default value for the 'mm' param in - their solrconfig.xml file. - * LUCENE-2608: Added the ability to specify the accuracy on a per request basis. - Implementations of SolrSpellChecker must change over to the new SolrSpellChecker - abstract methods using the new SpellingOptions class. While this change is not - backward compatible, it should be trivial to migrate as the SpellingOptions class - just encapsulates the parameters that were passed in to the methods before the change. (gsingers) + It is recommended that implementations of SolrSpellChecker should change over to the new SolrSpellChecker + methods using the new SpellingOptions class, but are not required to. While this change is + backward compatible, the trunk version of Solr has already dropped support for all but the SpellingOptions method. (gsingers) * readercycle script was removed. (SOLR-2046) @@ -95,10 +271,10 @@ New Features ---------------------- * SOLR-1302: Added several new distance based functions, including - Great Circle (haversine) for geo search, Manhattan, Euclidean - and String (using the StringDistance methods in the Lucene spellchecker). + Great Circle (haversine), Manhattan, Euclidean and String (using the + StringDistance methods in the Lucene spellchecker). Also added geohash(), deg() and rad() convenience functions. - See http://wiki.apache.org/solr/FunctionQuery. (yonik, gsingers) + See http://wiki.apache.org/solr/FunctionQuery. (gsingers) * SOLR-1553: New dismax parser implementation (accessible as "edismax") that supports full lucene syntax, improved reserved char escaping, @@ -130,12 +306,10 @@ New Features * SOLR-1653: Add PatternReplaceCharFilter (koji) -* SOLR-1131: FieldTypes can now output multiple Fields per Type and still be - searched. This can be handy for hiding the details of a particular - implementation such as in the spatial case. - (Chris Mattmann, shalin, noble, gsingers, yonik) +* SOLR-1131: FieldTypes can now output multiple Fields per Type and still be searched. This can be handy for hiding the details of a particular + implementation such as in the spatial case. (Chris Mattmann, shalin, noble, gsingers, yonik) -* SOLR-1586: Add support for Geohash FieldType (Chris Mattmann, gsingers) +* SOLR-1586: Add support for Geohash and Spatial Tile FieldType (Chris Mattmann, gsingers) * SOLR-1697: PluginInfo should load plugins w/o class attribute also (noble) @@ -167,41 +341,18 @@ New Features * SOLR-1740: ShingleFilterFactory supports the "minShingleSize" and "tokenSeparator" parameters for controlling the minimum shingle size produced by the filter, and the separator string that it uses, respectively. (Steven Rowe via rmuir) - + * SOLR-744: ShingleFilterFactory supports the "outputUnigramsIfNoShingles" parameter, to output unigrams if the number of input tokens is fewer than minShingleSize, and no shingles can be generated. (Chris Harris via Steven Rowe) - -* SOLR-571: The autowarmCount for LRUCaches (LRUCache and FastLRUCache) now - supports "percentages" which get evaluated relative the current size of - the cache when warming happens. - (Tomas Fernandez Lobbe and hossman) - -* SOLR-397: Date Faceting now supports a "facet.date.include" param - for specifying when the upper & lower end points of computed date - ranges should be included in the range. Legal values are: "all", - "lower", "upper", "edge", and "outer". For backwards compatibility - the default value is the set: [lower,upper,edge], so that al ranges - between start and ed are inclusive of their endpoints, but the - "before" and "after" ranges are not. - -* SOLR-945: JSON update handler that accepts add, delete, commit - commands in JSON format. (Ryan McKinley, yonik) * SOLR-1923: PhoneticFilterFactory now has support for the Caverphone algorithm. (rmuir) - -* SOLR-1915: DebugComponent now supports using a NamedList to model - Explanation objects in it's responses instead of - Explanation.toString (hossman) -* SOLR-1932: New relevancy function queries: termfreq, tf, docfreq, idf - norm, maxdoc, numdocs. (yonik) - * SOLR-1957: The VelocityResponseWriter contrib moved to core. - Example search UI now available at http://localhost:8983/solr/browse - (ehatcher) + Example search UI now available at http://localhost:8983/solr/browse + (ehatcher) * SOLR-1974: Add LimitTokenCountFilterFactory. (koji) @@ -217,11 +368,17 @@ New Features * SOLR-1984: Add HyphenationCompoundWordTokenFilterFactory. (PB via rmuir) -* SOLR-1568: Added native filtering support via geofilt for spatial field - types LatLonType, PointType, GeohashField. - See http://wiki.apache.org/solr/SpatialSearch for examples. - (yonik, gsingers) - +* SOLR-397: Date Faceting now supports a "facet.date.include" param + for specifying when the upper & lower end points of computed date + ranges should be included in the range. Legal values are: "all", + "lower", "upper", "edge", and "outer". For backwards compatibility + the default value is the set: [lower,upper,edge], so that al ranges + between start and ed are inclusive of their endpoints, but the + "before" and "after" ranges are not. + +* SOLR-945: JSON update handler that accepts add, delete, commit + commands in JSON format. (Ryan McKinley, yonik) + * SOLR-2015: Add a boolean attribute autoGeneratePhraseQueries to TextField. autoGeneratePhraseQueries="true" (the default) causes the query parser to generate phrase queries if multiple tokens are generated from a single @@ -245,78 +402,51 @@ New Features * SOLR-2053: Add support for custom comparators in Solr spellchecker, per LUCENE-2479 (gsingers) -* SOLR-1682: (SOLR-236, SOLR-237, SOLR-1773, SOLR-1311) Search grouping / Field collapsing. - (Martijn van Groningen, Emmanuel Keller, Shalin Shekhar Mangar, - Koji Sekiguchi, Iv�n de Prado, Ryan McKinley, Marc Sturlese, Peter Karich, - Bojan Smid, Charles Hornberger, Dieter Grad, Dmitry Lihachev, Doug Steigerwald, - Karsten Sperling, Michael Gundlach, Oleg Gnatovskiy, Thomas Traeger, - Harish Agarwal, yonik) - -* SOLR-1316: Create autosuggest component. - (Ankul Garg, Jason Rutherglen, Shalin Shekhar Mangar, gsingers, Robert Muir, ab) +* SOLR-2049: Add hl.multiValuedSeparatorChar for FastVectorHighlighter, per LUCENE-2603. (koji) * SOLR-2059: Add "types" attribute to WordDelimiterFilterFactory, which allows you to customize how WordDelimiterFilter tokenizes text with a configuration file. (Peter Karich, rmuir) -* SOLR-1665: Add debug component options for timings, results and query info only (gsingers, hossman, yonik) - * SOLR-2099: Add ability to throttle rsync based replication using rsync option --bwlimit. (Brandon Evans via koji) -* SOLR-2113: Add TermQParserPlugin, registered as "term". This is useful - when generating filter queries from terms returned from field faceting or - the terms component. Example: fq={!term f=weight}1.5 (hossman, yonik) +* SOLR-1316: Create autosuggest component. + (Ankul Garg, Jason Rutherglen, Shalin Shekhar Mangar, Grant Ingersoll, Robert Muir, ab) -* SOLR-2001: The query component will substitute an empty query that matches - no documents if the query parser returns null. This also prevents an - exception from being thrown by the default parser if "q" is missing. (yonik) - -* SOLR-2112: Solrj API now supports streaming results. (ryan) - -* SOLR-792: Adding PivotFacetComponent for Hierarchical faceting - (erik, Jeremy Hinegardner, Thibaut Lassalle, ryan) +* SOLR-1568: Added "native" filtering support for PointType, GeohashField. Added LatLonType with filtering support too. See + http://wiki.apache.org/solr/SpatialSearch and the example. Refactored some items in Lucene spatial. + Removed SpatialTileField as the underlying CartesianTier is broken beyond repair and is going to be moved. (gsingers) * SOLR-2128: Full parameter substitution for function queries. Example: q=add($v1,$v2)&v1=mul(popularity,5)&v2=20.0 (yonik) -* SOLR-2133: Function query parser can now parse multiple comma separated +* SOLR-2133: Function query parser can now parse multiple coma separated value sources. It also now fails if there is extra unexpected text after parsing the functions, instead of silently ignoring it. This allows expressions like q=dist(2,vector(1,2),$pt)&pt=3,4 (yonik) -* LUCENE-2507: Added DirectSolrSpellChecker, which uses Lucene's DirectSpellChecker - to retrieve correction candidates directly from the term dictionary using - levenshtein automata. (rmuir) +* SOLR-2157: Suggester should return alpha-sorted results when onlyMorePopular=false (ab) * SOLR-2010: Added ability to verify that spell checking collations have actual results in the index. (James Dyer via gsingers) - -* SOLR-1873: SolrCloud - added shared/central config and core/shard managment via zookeeper, - built-in load balancing, and infrastructure for future SolrCloud work. - (yonik, Mark Miller) - -* SOLR-2210: Add icu-based tokenizer and filters to contrib/analysis-extras (rmuir) - -* SOLR-1336: Add SmartChinese (word segmentation for Simplified Chinese) - tokenizer and filters to contrib/analysis-extras (rmuir) - -* SOLR-2211,LUCENE-2763: Added UAX29URLEmailTokenizerFactory, which implements - UAX#29, a unicode algorithm with good results for most languages, as well as - URL and E-mail tokenization according to the relevant RFCs. - (Tom Burton-West via rmuir) - -* SOLR-2237: Added StempelPolishStemFilterFactory to contrib/analysis-extras (rmuir) - -* SOLR-1729: Evaluation of NOW for date math is done only once per request for - consistency, and is also propagated to shards in distributed search. - Adding a parameter NOW= to the request will override the - current time. (Peter Sturge, yonik) * SOLR-2188: Added "maxTokenLength" argument to the factories for ClassicTokenizer, StandardTokenizer, and UAX29URLEmailTokenizer. (Steven Rowe) +* SOLR-2129: Added a Solr module for dynamic metadata extraction/indexing with Apache UIMA. + See contrib/uima/README.txt for more information. (Tommaso Teofili via rmuir) + +* SOLR-2325: Allow tagging and exlcusion of main query for faceting. (yonik) + +* SOLR-2263: Add ability for RawResponseWriter to stream binary files as well as + text files. (Eric Pugh via yonik) + +* SOLR-860: Add debug output for MoreLikeThis. (koji) + +* SOLR-1057: Add PathHierarchyTokenizerFactory. (ryan, koji) + Optimizations ---------------------- @@ -326,39 +456,17 @@ Optimizations * SOLR-1874: Optimize PatternReplaceFilter for better performance. (rmuir, uschindler) -* SOLR-1875: Per-segment field faceting for single valued string fields. - Enable with facet.method=fcs, control the number of threads used with - the "threads" local param on the facet.field param. This algorithm will - only be faster in the presence of rapid index changes. (yonik) - -* SOLR-1904: When facet.enum.cache.minDf > 0 and the base doc set is a - SortedIntSet, convert to HashDocSet for better performance. (yonik) - -* SOLR-1843: A new "rootName" attribute is now available when - configuring in solrconfig.xml. If this attribute is set, - Solr will use it as the root name for all MBeans Solr exposes via - JMX. The default root name is "solr" followed by the core name. - (Constantijn Visinescu, hossman) - * SOLR-1968: speed up initial filter cache population for facet.method=enum and also big terms for multi-valued facet.method=fc. The resulting speedup for the first facet request is anywhere from 30% to 32x, depending on how many - terms are in the field and how many documents match per term. (yonik) + terms are in the field and how many documents match per term. (yonik) * SOLR-2089: Speed up UnInvertedField faceting (facet.method=fc for multi-valued fields) when facet.limit is both high, and a high enough percentage of the number of unique terms in the field. Extreme cases yield speedups over 3x. (yonik) -* SOLR-2046: Simplify legacy replication scripts by adding common functions - to scripts-util. (koji) - -* SOLR-2092: Speed up single-valued and multi-valued "fc" faceting. Typical - improvement is 5%, but can be much greater (up to 10x faster) when facet.offset - is very large (deep paging). (yonik) - -* SOLR-2200: Improve the performance of DataImportHandler for large delta-import - updates. (Mark Waddle via rmuir) +* SOLR-2046: add common functions to scripts-util. (koji) Bug Fixes ---------------------- @@ -436,8 +544,9 @@ Bug Fixes * SOLR-1711: SolrJ - StreamingUpdateSolrServer had a race condition that could halt the streaming of documents. The original patch to fix this (never officially released) introduced another hanging bug due to - connections not being released. (Attila Babo, Erik Hetzner via yonik) - + connections not being released. + (Attila Babo, Erik Hetzner, Johannes Tuchscherer via yonik) + * SOLR-1748, SOLR-1747, SOLR-1746, SOLR-1745, SOLR-1744: Streams and Readers retrieved from ContentStreams are not closed in various places, resulting in file descriptor leaks. @@ -470,19 +579,6 @@ Bug Fixes * SOLR-1706: fixed WordDelimiterFilter for certain combinations of options where it would output incorrect tokens. (Robert Muir, Chris Male) -* SOLR-1902: Exposed SolrResourceLoader's class loader for use by Tika - -* SOLR-1908: Fixed SignatureUpdateProcessor to fail to initialize on - invalid config. Specificly: a signatureField that does not exist, - or overwriteDupes=true with a signatureField that is not indexed. - (hossman) - -* SOLR-1824: IndexSchema will now fail to initialize if there is a - problem initializing one of the fields or field types. (hossman) - -* SOLR-1928: TermsComponent didn't correctly break ties for non-text - fields sorted by count. (yonik) - * SOLR-1936: The JSON response format needed to escape unicode code point U+2028 - 'LINE SEPARATOR' (Robert Hofstra, yonik) @@ -520,7 +616,9 @@ Bug Fixes to be removed before it was finished being copied. This did not affect normal master/slave replication. (Peter Sturge via yonik) -* SOLR-2107: MoreLikeThisHandler doesn't work with alternate qparsers. (yonik) +* SOLR-2114: Fixed parsing error in hsin function. The function signature has changed slightly. (gsingers) + +* SOLR-2083: SpellCheckComponent misreports suggestions when distributed (James Dyer via gsingers) * SOLR-2111: Change exception handling in distributed faceting to work more like non-distributed faceting, change facet_counts/exception from a String @@ -532,29 +630,20 @@ Bug Fixes substitution/dereferencing. Properly encode local params in distributed faceting. (yonik) -* SOLR-2083: SpellCheckComponent misreports suggestions when distributed (James Dyer via gsingers) - -* SOLR-2108: Fixed false positives when using wildcard queries on fields with reversed - wildcard support. For example, a query of *zemog* would match documents that contain - 'gomez'. (Landon Kuhn via Robert Muir) - * SOLR-2135: Fix behavior of ConcurrentLRUCache when asking for getLatestAccessedItems(0) or getOldestAccessedItems(0). (David Smiley via hossman) * SOLR-2148: Highlighter doesn't support q.alt. (koji) -* SOLR-2157 Suggester should return alpha-sorted results when onlyMorePopular=false (ab) - * SOLR-1794: Dataimport of CLOB fields fails when getCharacterStream() is defined in a superclass. (Gunnar Gauslaa Bergem via rmuir) +* SOLR-2180: It was possible for EmbeddedSolrServer to leave searchers + open if a request threw an exception. (yonik) + * SOLR-2173: Suggester should always rebuild Lookup data if Lookup.load fails. (ab) -* SOLR-2190: change xpath from RSS 0.9 to 1.0 in slashdot sample. (koji) - -* SOLR-1962: SolrCore#initIndex should not use a mix of indexPath and newIndexPath (Mark Miller) - * SOLR-2057: DataImportHandler never calls UpdateRequestProcessor.finish() (Drew Farris via koji) @@ -564,9 +653,10 @@ Bug Fixes true/on/yes (for TRUE) and false/off/no (for FALSE) can be used for sub-options (debug, verbose, synchronous, commit, clean, optimize) for full/delta-import commands. (koji) -* SOLR-2252: When a child entity in nested entities is rootEntity="true", delta-import doesn't work. - (koji) - +* SOLR-2081: BaseResponseWriter.isStreamingDocs causes + SingleResponseWriter.end to be called 2x + (Chris A. Mattmann via hossman) + * SOLR-2219: The init() method of every SolrRequestHandler was being called twice. (ambikeshwar singh and hossman) @@ -576,8 +666,18 @@ Bug Fixes addresses "commitWithin" option on Update requests. (noble, hossman, and Maxim Valyanskiy) -* SOLR-2275: fix DisMax 'mm' parsing to be tolerant of whitespace - (Erick Erickson via hossman) +* SOLR-2261: fix velocity template layout.vm that referred to an older + version of jquery. (Eric Pugh via rmuir) + +* SOLR-1983: snappuller fails when modifiedConfFiles is not empty and + full copy of index is needed. (Alexander Kanarsky via yonik) + +* SOLR-2156: SnapPuller fails to clean Old Index Directories on Full Copy + (Jayendra Patil via yonik) + +* SOLR-96: Fix XML parsing in XMLUpdateRequestHandler and + DocumentAnalysisRequestHandler to respect charset from XML file and only + use HTTP header's "Content-Type" as a "hint". (Uwe Schindler) Other Changes ---------------------- @@ -593,6 +693,8 @@ Other Changes * SOLR-1592: Refactor XMLWriter startTag to allow arbitrary attributes to be written (Chris A. Mattmann via noble) +* SOLR-1561: Added Lucene 2.9.1 spatial contrib jar to lib. (gsingers) + * SOLR-1570: Log warnings if uniqueKey is multi-valued or not stored (hossman, shalin) * SOLR-1558: QueryElevationComponent only works if the uniqueKey field is @@ -645,29 +747,15 @@ Other Changes "defaultSearchField" as the default value for the "qf" param instead of failing with an error when "qf" is not specified. (hossman) -* SOLR-1821: Fix TimeZone-dependent test failure in TestEvaluatorBag. - (Chris Male via rmuir) - * SOLR-1851: luceneAutoCommit no longer has any effect - it has been remove (Mark Miller) * SOLR-1865: SolrResourceLoader.getLines ignores Byte Order Markers (BOMs) at the beginning of input files, these are often created by editors such as Windows Notepad. (rmuir, hossman) -* SOLR-1846: Eliminate support for the abortOnConfigurationError - option. It has never worked very well, and in recent versions of - Solr hasn't worked at all. (hossman) - * SOLR-1938: ElisionFilterFactory will use a default set of French contractions - if you do not supply a custom articles file. (rmuir) - -* SOLR-1889: The default logic for the 'mm' param of DismaxQParser and - ExtendedDismaxQParser has been changed to be determined based on the - effective value of the 'q.op' param (hossman) - -* SOLR-1946: Misc improvements to the SystemInfoHandler: /admin/system - (hossman) - + if you do not supply a custom articles file. (rmuir) + * SOLR-2003: SolrResourceLoader will report any encoding errors, rather than silently using replacement characters for invalid inputs (blargy via rmuir) @@ -683,8 +771,12 @@ Other Changes * SOLR-2213: Upgrade to jQuery 1.4.3 (Erick Erickson via ryan) -* SOLR-2289: Tweak spatial coords for example docs so they are a bit - more spread out (Erick Erickson via hossman) +* SOLR-1826: Add unit tests for highlighting with termOffsets=true + and overlapping tokens. (Stefan Oestreicher via rmuir) + +* SOLR-2340: Add version infos to message in JavaBinCodec when throwing + exception. (koji) + Build ---------------------- @@ -698,6 +790,10 @@ Build * SOLR-2042: Fixed some Maven deps (Drew Farris via gsingers) +* LUCENE-2657: Switch from using Maven POM templates to full POMs when + generating Maven artifacts (Steven Rowe) + + Documentation ---------------------- @@ -707,9 +803,6 @@ Documentation * SOLR-1792: Documented peculiar behavior of TestHarness.LocalRequestFactory (hossman) -* SOLR-2232: Improved README info on solr.solr.home in examples - (Eric Pugh and hossman) - ================== Release 1.4.0 ================== Release Date: See http://lucene.apache.org/solr for the official release date. diff --git a/solr/NOTICE.txt b/solr/NOTICE.txt index 10632d30b19..fa2fc0556c7 100644 --- a/solr/NOTICE.txt +++ b/solr/NOTICE.txt @@ -156,7 +156,6 @@ This product includes software developed by the Carrot2 Project. See http://project.carrot2.org/ - ========================================================================= == Guava Notice == ========================================================================= @@ -167,6 +166,16 @@ This product includes software developed by the Google Guava project. See http://code.google.com/p/guava-libraries/ +========================================================================= +== Prettify Notice == +========================================================================= + +Copyright ???? Google, Inc. + +This product includes software developed by the Google Prettify project. + +See http://code.google.com/p/google-code-prettify/ + ========================================================================= == Jackson Notice == ========================================================================= diff --git a/solr/build.xml b/solr/build.xml index 9304b565654..d8021276039 100644 --- a/solr/build.xml +++ b/solr/build.xml @@ -18,6 +18,8 @@ --> + + @@ -56,7 +58,7 @@ - + @@ -216,15 +218,17 @@ - + + - + + @@ -338,7 +342,7 @@ - + @@ -346,7 +350,7 @@ - + @@ -514,6 +518,7 @@ + @@ -613,10 +618,14 @@ - + + + + - + + @@ -714,7 +725,7 @@ - + + excludes="lib/README.committers.txt **/data/ **/logs/* **/classes/ **/*.sh **/bin/ src/scripts/ src/site/build/ **/target/ client/ruby/flare/ client/python contrib/**/build/ **/*.iml **/*.ipr **/*.iws contrib/clustering/example/lib/** contrib/clustering/lib/downloads/** contrib/analysis-extras/lib/** contrib/uima/lib/**" /> @@ -811,29 +822,20 @@ + + + + + - - - - - - - - - - + - - - - - @@ -846,6 +848,7 @@ + - + - + - + - @@ -879,63 +882,67 @@ - - - - - - - - + - + + + + + + + + + + + + + + - - - - - + - - - - @@ -951,6 +958,8 @@ description="runs the tasks over src/java excluding the license directory"> + + @@ -958,12 +967,16 @@ + + + + diff --git a/solr/common-build.xml b/solr/common-build.xml index b63ba6e1a54..966607530b1 100644 --- a/solr/common-build.xml +++ b/solr/common-build.xml @@ -33,6 +33,9 @@ + + + @@ -138,8 +141,8 @@ - @@ -270,6 +273,7 @@ includeAntRuntime="${javac.includeAntRuntime}" sourcepath="" classpathref="@{classpathref}"> + @@ -343,6 +347,9 @@ + + + @@ -367,6 +375,10 @@ +
    + + ]]>
    @@ -377,19 +389,34 @@ - - + + + + + + + + + + + + + + + + + + - + - - + @@ -437,6 +464,17 @@ + + + + + + + + + + + diff --git a/lucene/src/test/org/apache/lucene/index/codecs/preflexrw/TermInfosWriter.java b/solr/contrib/analysis-extras/CHANGES.txt similarity index 100% rename from lucene/src/test/org/apache/lucene/index/codecs/preflexrw/TermInfosWriter.java rename to solr/contrib/analysis-extras/CHANGES.txt diff --git a/solr/contrib/analysis-extras/build.xml b/solr/contrib/analysis-extras/build.xml index 2babe1a7e96..1b135e3c4af 100644 --- a/solr/contrib/analysis-extras/build.xml +++ b/solr/contrib/analysis-extras/build.xml @@ -73,7 +73,7 @@ - + diff --git a/solr/contrib/clustering/CHANGES.txt b/solr/contrib/clustering/CHANGES.txt index 80ae3136914..2bf66fa5f01 100644 --- a/solr/contrib/clustering/CHANGES.txt +++ b/solr/contrib/clustering/CHANGES.txt @@ -7,8 +7,11 @@ See http://wiki.apache.org/solr/ClusteringComponent CHANGES $Id$ +================== Release 4.0.0-dev ================== -================== Release XXXX ================== +(No Changes) + +================== Release 3.1.0-dev ================== * SOLR-1684: Switch to use the SolrIndexSearcher.doc(int, Set) method b/c it can use the document cache (gsingers) @@ -18,9 +21,9 @@ $Id$ This release of C2 also does not have a specific Lucene dependency. (Stanislaw Osinski, gsingers) * SOLR-2282: Add distributed search support for search result clustering. - (Brad Giaccio, koji) + (Brad Giaccio, Dawid Weiss, Stanislaw Osinski, rmuir, koji) ================== Release 1.4.0 ================== Solr Clustering will be released for the first time in Solr 1.4. See http://wiki.apache.org/solr/ClusteringComponent - for details on using. \ No newline at end of file + for details on using. diff --git a/solr/contrib/clustering/DISABLED-README.txt b/solr/contrib/clustering/DISABLED-README.txt deleted file mode 100644 index 8a43dbda63d..00000000000 --- a/solr/contrib/clustering/DISABLED-README.txt +++ /dev/null @@ -1,7 +0,0 @@ -In trunk this contrib module is currently disabled, as it uses the external -(binary) Carrot2 library (as trunk is free to change its API, this module fails -with linking exceptions). - -After a stable branch of Lucene is created from trunk, rename -'build.xml.disabled' back to 'build.xml' after replacing the Carrot2 -JARs by updated versions. diff --git a/solr/contrib/clustering/build.xml b/solr/contrib/clustering/build.xml index a8036428c9a..0621df640d3 100644 --- a/solr/contrib/clustering/build.xml +++ b/solr/contrib/clustering/build.xml @@ -42,7 +42,7 @@ - + diff --git a/solr/contrib/clustering/build.xml.disabled b/solr/contrib/clustering/build.xml.disabled deleted file mode 100644 index c5cb3af4548..00000000000 --- a/solr/contrib/clustering/build.xml.disabled +++ /dev/null @@ -1,191 +0,0 @@ - - - - - - - - - - - - Clustering Integraton - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Tests failed! - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/solr/contrib/clustering/lib/carrot2-core-3.4.0.jar b/solr/contrib/clustering/lib/carrot2-core-3.4.0.jar deleted file mode 100644 index a09b28ac477..00000000000 --- a/solr/contrib/clustering/lib/carrot2-core-3.4.0.jar +++ /dev/null @@ -1,2 +0,0 @@ -AnyObjectId[96c3bdbdaacd5289b0e654842e435689fbcf22e2] was removed in git history. -Apache SVN contains full history. \ No newline at end of file diff --git a/solr/contrib/clustering/lib/carrot2-core-3.4.2.jar b/solr/contrib/clustering/lib/carrot2-core-3.4.2.jar new file mode 100644 index 00000000000..6b8fdb5699c --- /dev/null +++ b/solr/contrib/clustering/lib/carrot2-core-3.4.2.jar @@ -0,0 +1,2 @@ +AnyObjectId[f872cbc8eec94f7d5b29a73f99cd13089848a3cd] was removed in git history. +Apache SVN contains full history. \ No newline at end of file diff --git a/solr/contrib/clustering/src/main/java/org/apache/solr/handler/clustering/ClusteringComponent.java b/solr/contrib/clustering/src/main/java/org/apache/solr/handler/clustering/ClusteringComponent.java index 5996b60c73f..0259bb80b62 100644 --- a/solr/contrib/clustering/src/main/java/org/apache/solr/handler/clustering/ClusteringComponent.java +++ b/solr/contrib/clustering/src/main/java/org/apache/solr/handler/clustering/ClusteringComponent.java @@ -46,6 +46,13 @@ import java.util.Set; *

    * This engine is experimental. Output from this engine is subject to change in future releases. * + *

    + * <searchComponent class="org.apache.solr.handler.clustering.ClusteringComponent" name="clustering">
    + *   <lst name="engine">
    + *     <str name="name">default</str>
    + *     <str name="carrot.algorithm">org.carrot2.clustering.lingo.LingoClusteringAlgorithm</str>
    + *   </lst>
    + * </searchComponent>
    */ public class ClusteringComponent extends SearchComponent implements SolrCoreAware { private transient static Logger log = LoggerFactory.getLogger(ClusteringComponent.class); diff --git a/solr/contrib/clustering/src/main/java/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngine.java b/solr/contrib/clustering/src/main/java/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngine.java index 55f38515997..b0cb1981d89 100644 --- a/solr/contrib/clustering/src/main/java/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngine.java +++ b/solr/contrib/clustering/src/main/java/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngine.java @@ -77,6 +77,7 @@ public class CarrotClusteringEngine extends SearchClusteringEngine { private String idFieldName; + @Override @Deprecated public Object cluster(Query query, DocList docList, SolrQueryRequest sreq) { SolrIndexSearcher searcher = sreq.getSearcher(); @@ -90,6 +91,7 @@ public class CarrotClusteringEngine extends SearchClusteringEngine { } } + @Override public Object cluster(Query query, SolrDocumentList solrDocList, Map docIds, SolrQueryRequest sreq) { try { @@ -124,7 +126,7 @@ public class CarrotClusteringEngine extends SearchClusteringEngine { // is included in the code base of Solr, so that it's possible to refactor // the Lucene APIs the factory relies on if needed. initAttributes.put("PreprocessingPipeline.languageModelFactory", - new LuceneLanguageModelFactory()); + LuceneLanguageModelFactory.class); this.controller.init(initAttributes); this.idFieldName = core.getSchema().getUniqueKeyField().getName(); diff --git a/solr/contrib/clustering/src/main/java/org/apache/solr/handler/clustering/carrot2/LuceneLanguageModelFactory.java b/solr/contrib/clustering/src/main/java/org/apache/solr/handler/clustering/carrot2/LuceneLanguageModelFactory.java index d7b2ace1f6b..857fccf48f3 100644 --- a/solr/contrib/clustering/src/main/java/org/apache/solr/handler/clustering/carrot2/LuceneLanguageModelFactory.java +++ b/solr/contrib/clustering/src/main/java/org/apache/solr/handler/clustering/carrot2/LuceneLanguageModelFactory.java @@ -31,7 +31,7 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.carrot2.core.LanguageCode; import org.carrot2.text.analysis.ExtendedWhitespaceTokenizer; import org.carrot2.text.analysis.ITokenizer; -import org.carrot2.text.linguistic.BaseLanguageModelFactory; +import org.carrot2.text.linguistic.DefaultLanguageModelFactory; import org.carrot2.text.linguistic.IStemmer; import org.carrot2.text.linguistic.IdentityStemmer; import org.carrot2.text.util.MutableCharArray; @@ -62,14 +62,15 @@ import org.tartarus.snowball.ext.TurkishStemmer; * change, the changes can be made in this class. */ @Bindable(prefix = "DefaultLanguageModelFactory") -public class LuceneLanguageModelFactory extends BaseLanguageModelFactory { +public class LuceneLanguageModelFactory extends DefaultLanguageModelFactory { final static Logger logger = org.slf4j.LoggerFactory .getLogger(LuceneLanguageModelFactory.class); /** * Provide an {@link IStemmer} implementation for a given language. */ - protected IStemmer createStemmer(LanguageCode language) { + @Override + protected IStemmer createStemmer(LanguageCode language) { switch (language) { case ARABIC: return ArabicStemmerFactory.createStemmer(); @@ -200,7 +201,7 @@ public class LuceneLanguageModelFactory extends BaseLanguageModelFactory { logger .warn( "Could not instantiate Lucene stemmer for Arabic, clustering quality " - + "of Chinese content may be degraded. For best quality clusters, " + + "of Arabic content may be degraded. For best quality clusters, " + "make sure Lucene's Arabic analyzer JAR is in the classpath", e); } diff --git a/solr/contrib/clustering/src/test/java/org/apache/solr/handler/clustering/DistributedClusteringComponentTest.java b/solr/contrib/clustering/src/test/java/org/apache/solr/handler/clustering/DistributedClusteringComponentTest.java index 1b815b2cb0d..758d829f7cd 100644 --- a/solr/contrib/clustering/src/test/java/org/apache/solr/handler/clustering/DistributedClusteringComponentTest.java +++ b/solr/contrib/clustering/src/test/java/org/apache/solr/handler/clustering/DistributedClusteringComponentTest.java @@ -20,12 +20,15 @@ package org.apache.solr.handler.clustering; import org.apache.solr.BaseDistributedSearchTestCase; import org.apache.solr.common.params.CommonParams; -import org.junit.Ignore; - -@Ignore("FIXME: test fails on hudson") public class DistributedClusteringComponentTest extends BaseDistributedSearchTestCase { + @Override + public String getSolrHome() { + // TODO: this should work with just "solr-clustering"... + return getFile("solr-clustering").getAbsolutePath(); + } + @Override public void doTest() throws Exception { del("*:*"); diff --git a/solr/contrib/clustering/src/test/java/org/apache/solr/handler/clustering/MockDocumentClusteringEngine.java b/solr/contrib/clustering/src/test/java/org/apache/solr/handler/clustering/MockDocumentClusteringEngine.java index 90f0ab73e5a..77b3fcfba06 100644 --- a/solr/contrib/clustering/src/test/java/org/apache/solr/handler/clustering/MockDocumentClusteringEngine.java +++ b/solr/contrib/clustering/src/test/java/org/apache/solr/handler/clustering/MockDocumentClusteringEngine.java @@ -25,11 +25,13 @@ import org.apache.solr.search.DocSet; * **/ public class MockDocumentClusteringEngine extends DocumentClusteringEngine { + @Override public NamedList cluster(DocSet docs, SolrParams solrParams) { NamedList result = new NamedList(); return result; } + @Override public NamedList cluster(SolrParams solrParams) { NamedList result = new NamedList(); return result; diff --git a/solr/contrib/clustering/src/test/resources/solr-clustering/conf/solrconfig.xml b/solr/contrib/clustering/src/test/resources/solr-clustering/conf/solrconfig.xml index c59cc9b63fd..958848535ed 100644 --- a/solr/contrib/clustering/src/test/resources/solr-clustering/conf/solrconfig.xml +++ b/solr/contrib/clustering/src/test/resources/solr-clustering/conf/solrconfig.xml @@ -428,13 +428,6 @@ --> - - - - diff --git a/solr/contrib/dataimporthandler/CHANGES.txt b/solr/contrib/dataimporthandler/CHANGES.txt index 83b022e194c..3fbb1c23493 100644 --- a/solr/contrib/dataimporthandler/CHANGES.txt +++ b/solr/contrib/dataimporthandler/CHANGES.txt @@ -8,7 +8,11 @@ HTTP data sources quick and easy. $Id$ -================== 1.5.0-dev ================== +================== 4.0.0-dev ================== + +(No Changes) + +================== 3.1.0-dev ================== Upgrading from Solr 1.4 ---------------------- @@ -44,6 +48,9 @@ New Features Optimizations ---------------------- +* SOLR-2200: Improve the performance of DataImportHandler for large delta-import + updates. (Mark Waddle via rmuir) + Bug Fixes ---------------------- * SOLR-1638: Fixed NullPointerException during import if uniqueKey is not specified @@ -65,9 +72,19 @@ Bug Fixes * SOLR-1811: formatDate should use the current NOW value always (Sean Timm via noble) +* SOLR-2310: getTimeElapsedSince() returns incorrect hour value when the elapse is over 60 hours + (tom liu via koji) + +* SOLR-2252: When a child entity in nested entities is rootEntity="true", delta-import doesn't work. + (koji) + +* SOLR-2330: solrconfig.xml files in example-DIH are broken. (Matt Parker, koji) + Other Changes ---------------------- +* SOLR-1821: Fix TimeZone-dependent test failure in TestEvaluatorBag. + (Chris Male via rmuir) Build ---------------------- diff --git a/lucene/src/test/org/apache/lucene/index/codecs/preflexrw/PreFlexFieldsWriter.java b/solr/contrib/dataimporthandler/README.txt similarity index 100% rename from lucene/src/test/org/apache/lucene/index/codecs/preflexrw/PreFlexFieldsWriter.java rename to solr/contrib/dataimporthandler/README.txt diff --git a/solr/contrib/dataimporthandler/build.xml b/solr/contrib/dataimporthandler/build.xml index c21c0f9a648..7772fcbb4e8 100644 --- a/solr/contrib/dataimporthandler/build.xml +++ b/solr/contrib/dataimporthandler/build.xml @@ -56,7 +56,7 @@ - +
    @@ -68,7 +68,7 @@ - +
    diff --git a/solr/contrib/dataimporthandler/solr-dataimporthandler-pom.xml.template b/solr/contrib/dataimporthandler/solr-dataimporthandler-pom.xml.template deleted file mode 100644 index 807681d7dc1..00000000000 --- a/solr/contrib/dataimporthandler/solr-dataimporthandler-pom.xml.template +++ /dev/null @@ -1,39 +0,0 @@ - - - - - 4.0.0 - - - org.apache.solr - solr-parent - @version@ - - - org.apache.solr - solr-dataimporthandler - Apache Solr DataImportHandler - @version@ - Apache Solr DataImportHandler - jar - - diff --git a/solr/contrib/dataimporthandler/src/extras/main/java/org/apache/solr/handler/dataimport/MailEntityProcessor.java b/solr/contrib/dataimporthandler/src/extras/main/java/org/apache/solr/handler/dataimport/MailEntityProcessor.java index 5b9374c744a..0231d1ec7e3 100644 --- a/solr/contrib/dataimporthandler/src/extras/main/java/org/apache/solr/handler/dataimport/MailEntityProcessor.java +++ b/solr/contrib/dataimporthandler/src/extras/main/java/org/apache/solr/handler/dataimport/MailEntityProcessor.java @@ -51,6 +51,7 @@ public class MailEntityProcessor extends EntityProcessorBase { public SearchTerm getCustomSearch(Folder folder); } + @Override public void init(Context context) { super.init(context); // set attributes using XXX getXXXFromContext(attribute, defualtValue); @@ -95,6 +96,7 @@ public class MailEntityProcessor extends EntityProcessorBase { logConfig(); } + @Override public Map nextRow() { Message mail; Map row = null; diff --git a/solr/contrib/dataimporthandler/src/extras/main/java/org/apache/solr/handler/dataimport/TikaEntityProcessor.java b/solr/contrib/dataimporthandler/src/extras/main/java/org/apache/solr/handler/dataimport/TikaEntityProcessor.java index 4e4ad41afbe..b7b31ab5abb 100644 --- a/solr/contrib/dataimporthandler/src/extras/main/java/org/apache/solr/handler/dataimport/TikaEntityProcessor.java +++ b/solr/contrib/dataimporthandler/src/extras/main/java/org/apache/solr/handler/dataimport/TikaEntityProcessor.java @@ -92,6 +92,7 @@ public class TikaEntityProcessor extends EntityProcessorBase { done = false; } + @Override public Map nextRow() { if(done) return null; Map row = new HashMap(); diff --git a/solr/contrib/dataimporthandler/src/extras/test/java/org/apache/solr/handler/dataimport/TestMailEntityProcessor.java b/solr/contrib/dataimporthandler/src/extras/test/java/org/apache/solr/handler/dataimport/TestMailEntityProcessor.java index 8a1f1083d4e..2ac19b32192 100644 --- a/solr/contrib/dataimporthandler/src/extras/test/java/org/apache/solr/handler/dataimport/TestMailEntityProcessor.java +++ b/solr/contrib/dataimporthandler/src/extras/test/java/org/apache/solr/handler/dataimport/TestMailEntityProcessor.java @@ -191,18 +191,22 @@ public class TestMailEntityProcessor extends AbstractDataImportHandlerTestCase { super(null, ".", null); } + @Override public boolean upload(SolrInputDocument doc) { return docs.add(doc); } + @Override public void log(int event, String name, Object row) { // Do nothing } + @Override public void doDeleteAll() { deleteAllCalled = Boolean.TRUE; } + @Override public void commit(boolean b) { commitCalled = Boolean.TRUE; } diff --git a/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/BinContentStreamDataSource.java b/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/BinContentStreamDataSource.java index 221d8eacbc7..1187f65e92d 100644 --- a/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/BinContentStreamDataSource.java +++ b/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/BinContentStreamDataSource.java @@ -39,10 +39,12 @@ public class BinContentStreamDataSource extends DataSource { private InputStream in; + @Override public void init(Context context, Properties initProps) { this.context = (ContextImpl) context; } + @Override public InputStream getData(String query) { contentStream = context.getDocBuilder().requestParameters.contentStream; if (contentStream == null) @@ -55,6 +57,7 @@ public class BinContentStreamDataSource extends DataSource { } } + @Override public void close() { if (contentStream != null) { try { diff --git a/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/BinFileDataSource.java b/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/BinFileDataSource.java index 4d4cdebc62b..e473ab8da32 100644 --- a/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/BinFileDataSource.java +++ b/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/BinFileDataSource.java @@ -43,10 +43,12 @@ import java.util.Properties; public class BinFileDataSource extends DataSource{ protected String basePath; + @Override public void init(Context context, Properties initProps) { basePath = initProps.getProperty(FileDataSource.BASE_PATH); } + @Override public InputStream getData(String query) { File f = FileDataSource.getFile(basePath,query); try { @@ -57,6 +59,7 @@ public class BinFileDataSource extends DataSource{ } } + @Override public void close() { } diff --git a/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/BinURLDataSource.java b/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/BinURLDataSource.java index 9d4d879c2ce..045d6fa3bd0 100644 --- a/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/BinURLDataSource.java +++ b/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/BinURLDataSource.java @@ -49,6 +49,7 @@ public class BinURLDataSource extends DataSource{ public BinURLDataSource() { } + @Override public void init(Context context, Properties initProps) { this.context = context; this.initProps = initProps; @@ -72,6 +73,7 @@ public class BinURLDataSource extends DataSource{ } } + @Override public InputStream getData(String query) { URL url = null; try { @@ -89,6 +91,7 @@ public class BinURLDataSource extends DataSource{ } } + @Override public void close() { } private String getInitPropWithReplacements(String propertyName) { diff --git a/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/CachedSqlEntityProcessor.java b/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/CachedSqlEntityProcessor.java index 69b7b2b6f18..864e772288e 100644 --- a/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/CachedSqlEntityProcessor.java +++ b/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/CachedSqlEntityProcessor.java @@ -38,6 +38,7 @@ import java.util.Map; public class CachedSqlEntityProcessor extends SqlEntityProcessor { private boolean isFirst; + @Override @SuppressWarnings("unchecked") public void init(Context context) { super.init(context); @@ -45,6 +46,7 @@ public class CachedSqlEntityProcessor extends SqlEntityProcessor { isFirst = true; } + @Override public Map nextRow() { if (dataSourceRowCache != null) return getFromRowCacheTransformed(); @@ -60,6 +62,7 @@ public class CachedSqlEntityProcessor extends SqlEntityProcessor { } + @Override protected List> getAllNonCachedRows() { List> rows = new ArrayList>(); String q = getQuery(); diff --git a/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/ClobTransformer.java b/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/ClobTransformer.java index 5ebd3baa5dd..ae970d25fdb 100644 --- a/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/ClobTransformer.java +++ b/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/ClobTransformer.java @@ -37,6 +37,7 @@ import java.util.Map; * @since solr 1.4 */ public class ClobTransformer extends Transformer { + @Override public Object transformRow(Map aRow, Context context) { for (Map map : context.getAllEntityFields()) { if (!TRUE.equals(map.get(CLOB))) continue; diff --git a/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/ContentStreamDataSource.java b/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/ContentStreamDataSource.java index 3b55fd6cf5e..58ed19ed595 100644 --- a/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/ContentStreamDataSource.java +++ b/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/ContentStreamDataSource.java @@ -39,10 +39,12 @@ public class ContentStreamDataSource extends DataSource { private ContentStream contentStream; private Reader reader; + @Override public void init(Context context, Properties initProps) { this.context = (ContextImpl) context; } + @Override public Reader getData(String query) { contentStream = context.getDocBuilder().requestParameters.contentStream; if (contentStream == null) @@ -55,6 +57,7 @@ public class ContentStreamDataSource extends DataSource { } } + @Override public void close() { if (contentStream != null) { try { diff --git a/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/ContextImpl.java b/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/ContextImpl.java index 6dfa48276c9..bd726835e2d 100644 --- a/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/ContextImpl.java +++ b/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/ContextImpl.java @@ -71,22 +71,27 @@ public class ContextImpl extends Context { parent = parentContext; } + @Override public String getEntityAttribute(String name) { return entity == null ? null : entity.allAttributes.get(name); } + @Override public String getResolvedEntityAttribute(String name) { return entity == null ? null : resolver.replaceTokens(entity.allAttributes.get(name)); } + @Override public List> getAllEntityFields() { return entity == null ? Collections.EMPTY_LIST : entity.allFieldsList; } + @Override public VariableResolver getVariableResolver() { return resolver; } + @Override public DataSource getDataSource() { if (ds != null) return ds; if(entity == null) return null; @@ -101,26 +106,32 @@ public class ContextImpl extends Context { return entity.dataSrc; } + @Override public DataSource getDataSource(String name) { return dataImporter.getDataSourceInstance(entity, name, this); } + @Override public boolean isRootEntity() { return entity.isDocRoot; } + @Override public String currentProcess() { return currProcess; } + @Override public Map getRequestParameters() { return requestParams; } + @Override public EntityProcessor getEntityProcessor() { return entity == null ? null : entity.processor; } + @Override public void setSessionAttribute(String name, Object val, String scope) { if(name == null) return; if (Context.SCOPE_ENTITY.equals(scope)) { @@ -148,6 +159,7 @@ public class ContextImpl extends Context { else entitySession.put(name, val); } + @Override public Object getSessionAttribute(String name, String scope) { if (Context.SCOPE_ENTITY.equals(scope)) { if (entitySession == null) @@ -166,6 +178,7 @@ public class ContextImpl extends Context { return null; } + @Override public Context getParentContext() { return parent; } @@ -187,15 +200,18 @@ public class ContextImpl extends Context { } + @Override public SolrCore getSolrCore() { return dataImporter == null ? null : dataImporter.getCore(); } + @Override public Map getStats() { return docBuilder != null ? docBuilder.importStatistics.getStatsSnapshot() : Collections.emptyMap(); } + @Override public String getScript() { if(dataImporter != null) { DataConfig.Script script = dataImporter.getConfig().script; @@ -204,6 +220,7 @@ public class ContextImpl extends Context { return null; } + @Override public String getScriptLanguage() { if (dataImporter != null) { DataConfig.Script script = dataImporter.getConfig().script; @@ -212,12 +229,14 @@ public class ContextImpl extends Context { return null; } + @Override public void deleteDoc(String id) { if(docBuilder != null){ docBuilder.writer.deleteDoc(id); } } + @Override public void deleteDocByQuery(String query) { if(docBuilder != null){ docBuilder.writer.deleteByQuery(query); @@ -227,10 +246,12 @@ public class ContextImpl extends Context { DocBuilder getDocBuilder(){ return docBuilder; } + @Override public Object resolve(String var) { return resolver.resolve(var); } + @Override public String replaceTokens(String template) { return resolver.replaceTokens(template); } diff --git a/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/DataConfig.java b/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/DataConfig.java index f548ff648a9..f0a9e412427 100644 --- a/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/DataConfig.java +++ b/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/DataConfig.java @@ -214,6 +214,7 @@ public class DataConfig { public Map allAttributes = new HashMap() { + @Override public String put(String key, String value) { if (super.containsKey(key)) return super.get(key); diff --git a/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/DataImporter.java b/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/DataImporter.java index 6d9206d43ed..45f8fcf1598 100644 --- a/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/DataImporter.java +++ b/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/DataImporter.java @@ -423,6 +423,7 @@ public class DataImporter { } static final ThreadLocal QUERY_COUNT = new ThreadLocal() { + @Override protected AtomicLong initialValue() { return new AtomicLong(); } diff --git a/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/DateFormatTransformer.java b/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/DateFormatTransformer.java index e2c6e221095..95c601e8911 100644 --- a/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/DateFormatTransformer.java +++ b/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/DateFormatTransformer.java @@ -45,6 +45,7 @@ public class DateFormatTransformer extends Transformer { private static final Logger LOG = LoggerFactory .getLogger(DateFormatTransformer.class); + @Override @SuppressWarnings("unchecked") public Object transformRow(Map aRow, Context context) { diff --git a/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/DebugLogger.java b/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/DebugLogger.java index 77c1ea7669e..8ee0126dc25 100644 --- a/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/DebugLogger.java +++ b/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/DebugLogger.java @@ -60,6 +60,7 @@ class DebugLogger { output = new NamedList(); debugStack = new Stack() { + @Override public DebugInfo pop() { if (size() == 1) throw new DataImportHandlerException( @@ -169,14 +170,17 @@ class DebugLogger { DataSource wrapDs(final DataSource ds) { return new DataSource() { + @Override public void init(Context context, Properties initProps) { ds.init(context, initProps); } + @Override public void close() { ds.close(); } + @Override public Object getData(String query) { writer.log(SolrWriter.ENTITY_META, "query", query); long start = System.currentTimeMillis(); @@ -203,6 +207,7 @@ class DebugLogger { Transformer wrapTransformer(final Transformer t) { return new Transformer() { + @Override public Object transformRow(Map row, Context context) { writer.log(SolrWriter.PRE_TRANSFORMER_ROW, null, row); String tName = getTransformerName(t); diff --git a/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/DocBuilder.java b/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/DocBuilder.java index 86afe4af433..42bf6da9499 100644 --- a/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/DocBuilder.java +++ b/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/DocBuilder.java @@ -139,6 +139,7 @@ public class DocBuilder { document = dataImporter.getConfig().document; final AtomicLong startTime = new AtomicLong(System.currentTimeMillis()); statusMessages.put(TIME_ELAPSED, new Object() { + @Override public String toString() { return getTimeElapsedSince(startTime.get()); } @@ -949,7 +950,7 @@ public class DocBuilder { static String getTimeElapsedSince(long l) { l = System.currentTimeMillis() - l; - return (l / (60000 * 60)) % 60 + ":" + (l / 60000) % 60 + ":" + (l / 1000) + return (l / (60000 * 60)) + ":" + (l / 60000) % 60 + ":" + (l / 1000) % 60 + "." + l % 1000; } diff --git a/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/EntityProcessorBase.java b/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/EntityProcessorBase.java index c2294bd6549..5d761194440 100644 --- a/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/EntityProcessorBase.java +++ b/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/EntityProcessorBase.java @@ -49,6 +49,7 @@ public class EntityProcessorBase extends EntityProcessor { protected String onError = ABORT; + @Override public void init(Context context) { rowIterator = null; this.context = context; @@ -86,14 +87,17 @@ public class EntityProcessorBase extends EntityProcessor { } } + @Override public Map nextModifiedRowKey() { return null; } + @Override public Map nextDeletedRowKey() { return null; } + @Override public Map nextModifiedParentRowKey() { return null; } @@ -105,11 +109,13 @@ public class EntityProcessorBase extends EntityProcessor { * @return a row where the key is the name of the field and value can be any Object or a Collection of objects. Return * null to signal end of rows */ + @Override public Map nextRow() { return null;// do not do anything } + @Override public void destroy() { /*no op*/ } diff --git a/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/EntityProcessorWrapper.java b/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/EntityProcessorWrapper.java index c46ddcf9e6d..432e64ac767 100644 --- a/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/EntityProcessorWrapper.java +++ b/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/EntityProcessorWrapper.java @@ -54,6 +54,7 @@ public class EntityProcessorWrapper extends EntityProcessor { this.docBuilder = docBuilder; } + @Override public void init(Context context) { rowcache = null; this.context = context; @@ -79,6 +80,7 @@ public class EntityProcessorWrapper extends EntityProcessor { String[] transArr = transClasses.split(","); transformers = new ArrayList() { + @Override public boolean add(Transformer transformer) { if (docBuilder != null && docBuilder.verboseDebug) { transformer = docBuilder.writer.getDebugLogger().wrapTransformer(transformer); @@ -135,6 +137,7 @@ public class EntityProcessorWrapper extends EntityProcessor { o = clazz.newInstance(); } + @Override public Object transformRow(Map aRow, Context context) { try { return meth.invoke(o, aRow); @@ -223,6 +226,7 @@ public class EntityProcessorWrapper extends EntityProcessor { && Boolean.parseBoolean(oMap.get("$stopTransform").toString()); } + @Override public Map nextRow() { if (rowcache != null) { return getFromRowCache(); @@ -252,6 +256,7 @@ public class EntityProcessorWrapper extends EntityProcessor { } } + @Override public Map nextModifiedRowKey() { Map row = delegate.nextModifiedRowKey(); row = applyTransformer(row); @@ -259,6 +264,7 @@ public class EntityProcessorWrapper extends EntityProcessor { return row; } + @Override public Map nextDeletedRowKey() { Map row = delegate.nextDeletedRowKey(); row = applyTransformer(row); @@ -266,10 +272,12 @@ public class EntityProcessorWrapper extends EntityProcessor { return row; } + @Override public Map nextModifiedParentRowKey() { return delegate.nextModifiedParentRowKey(); } + @Override public void destroy() { delegate.destroy(); } diff --git a/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/EvaluatorBag.java b/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/EvaluatorBag.java index 9c4321a97d7..24e728d82c0 100644 --- a/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/EvaluatorBag.java +++ b/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/EvaluatorBag.java @@ -66,6 +66,7 @@ public class EvaluatorBag { */ public static Evaluator getSqlEscapingEvaluator() { return new Evaluator() { + @Override public String evaluate(String expression, Context context) { List l = parseParams(expression, context.getVariableResolver()); if (l.size() != 1) { @@ -90,6 +91,7 @@ public class EvaluatorBag { */ public static Evaluator getSolrQueryEscapingEvaluator() { return new Evaluator() { + @Override public String evaluate(String expression, Context context) { List l = parseParams(expression, context.getVariableResolver()); if (l.size() != 1) { @@ -109,6 +111,7 @@ public class EvaluatorBag { */ public static Evaluator getUrlEvaluator() { return new Evaluator() { + @Override public String evaluate(String expression, Context context) { List l = parseParams(expression, context.getVariableResolver()); if (l.size() != 1) { @@ -138,6 +141,7 @@ public class EvaluatorBag { */ public static Evaluator getDateFormatEvaluator() { return new Evaluator() { + @Override public String evaluate(String expression, Context context) { List l = parseParams(expression, context.getVariableResolver()); if (l.size() != 2) { @@ -288,6 +292,7 @@ public class EvaluatorBag { } + @Override public String toString() { Object o = vr.resolve(varName); return o == null ? null : o.toString(); diff --git a/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/FieldReaderDataSource.java b/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/FieldReaderDataSource.java index 8b2ae93c12c..b9d9ec74ab9 100644 --- a/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/FieldReaderDataSource.java +++ b/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/FieldReaderDataSource.java @@ -52,6 +52,7 @@ public class FieldReaderDataSource extends DataSource { private String encoding; private EntityProcessorWrapper entityProcessor; + @Override public void init(Context context, Properties initProps) { dataField = context.getEntityAttribute("dataField"); encoding = context.getEntityAttribute("encoding"); @@ -59,6 +60,7 @@ public class FieldReaderDataSource extends DataSource { /*no op*/ } + @Override public Reader getData(String query) { Object o = entityProcessor.getVariableResolver().resolve(dataField); if (o == null) { @@ -111,6 +113,7 @@ public class FieldReaderDataSource extends DataSource { } } + @Override public void close() { } diff --git a/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/FieldStreamDataSource.java b/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/FieldStreamDataSource.java index f92f7cb4b15..132367cc0fa 100644 --- a/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/FieldStreamDataSource.java +++ b/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/FieldStreamDataSource.java @@ -52,12 +52,14 @@ public class FieldStreamDataSource extends DataSource { protected String dataField; private EntityProcessorWrapper wrapper; + @Override public void init(Context context, Properties initProps) { dataField = context.getEntityAttribute("dataField"); wrapper = (EntityProcessorWrapper) context.getEntityProcessor(); /*no op*/ } + @Override public InputStream getData(String query) { Object o = wrapper.getVariableResolver().resolve(dataField); if (o == null) { @@ -90,6 +92,7 @@ public class FieldStreamDataSource extends DataSource { } + @Override public void close() { } } diff --git a/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/FileDataSource.java b/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/FileDataSource.java index 64353ef3fa4..2f5a5aa1e2e 100644 --- a/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/FileDataSource.java +++ b/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/FileDataSource.java @@ -59,6 +59,7 @@ public class FileDataSource extends DataSource { private static final Logger LOG = LoggerFactory.getLogger(FileDataSource.class); + @Override public void init(Context context, Properties initProps) { basePath = initProps.getProperty(BASE_PATH); if (initProps.get(URLDataSource.ENCODING) != null) @@ -79,6 +80,7 @@ public class FileDataSource extends DataSource { * returned Reader *

    */ + @Override public Reader getData(String query) { File f = getFile(basePath,query); try { @@ -130,6 +132,7 @@ public class FileDataSource extends DataSource { } } + @Override public void close() { } diff --git a/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/FileListEntityProcessor.java b/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/FileListEntityProcessor.java index 72924176731..7549af7dfbd 100644 --- a/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/FileListEntityProcessor.java +++ b/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/FileListEntityProcessor.java @@ -106,6 +106,7 @@ public class FileListEntityProcessor extends EntityProcessorBase { private Pattern fileNamePattern, excludesPattern; + @Override public void init(Context context) { super.init(context); fileName = context.getEntityAttribute(FILE_NAME); @@ -195,6 +196,7 @@ public class FileListEntityProcessor extends EntityProcessorBase { return Long.parseLong(sizeStr); } + @Override public Map nextRow() { if (rowIterator != null) return getNext(); diff --git a/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/JdbcDataSource.java b/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/JdbcDataSource.java index cb38e480bf0..f48ca2cda86 100644 --- a/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/JdbcDataSource.java +++ b/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/JdbcDataSource.java @@ -54,6 +54,7 @@ public class JdbcDataSource extends private int maxRows = 0; + @Override public void init(Context context, Properties initProps) { Object o = initProps.get(CONVERT_TYPE); if (o != null) @@ -204,6 +205,7 @@ public class JdbcDataSource extends } } + @Override public Iterator> getData(String query) { ResultSetIterator r = new ResultSetIterator(query); return r.getIterator(); @@ -370,6 +372,7 @@ public class JdbcDataSource extends } } + @Override protected void finalize() throws Throwable { try { if(!isClosed){ @@ -383,6 +386,7 @@ public class JdbcDataSource extends private boolean isClosed = false; + @Override public void close() { try { closeConnection(); diff --git a/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/LineEntityProcessor.java b/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/LineEntityProcessor.java index 0a8b201ab42..30e366316f7 100644 --- a/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/LineEntityProcessor.java +++ b/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/LineEntityProcessor.java @@ -64,6 +64,7 @@ public class LineEntityProcessor extends EntityProcessorBase { /** * Parses each of the entity attributes. */ + @Override public void init(Context context) { super.init(context); String s; @@ -97,6 +98,7 @@ public class LineEntityProcessor extends EntityProcessorBase { * from the url. However transformers can be used to create as * many other fields as required. */ + @Override public Map nextRow() { if (reader == null) { reader = new BufferedReader((Reader) context.getDataSource().getData(url)); diff --git a/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/LogTransformer.java b/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/LogTransformer.java index d9d2f115d24..5a603a74049 100644 --- a/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/LogTransformer.java +++ b/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/LogTransformer.java @@ -35,6 +35,7 @@ import java.util.Map; public class LogTransformer extends Transformer { Logger LOG = LoggerFactory.getLogger(LogTransformer.class); + @Override public Object transformRow(Map row, Context ctx) { String expr = ctx.getEntityAttribute(LOG_TEMPLATE); String level = ctx.replaceTokens(ctx.getEntityAttribute(LOG_LEVEL)); diff --git a/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/MockDataSource.java b/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/MockDataSource.java index 7b747d72a56..6fd7213b5a7 100644 --- a/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/MockDataSource.java +++ b/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/MockDataSource.java @@ -45,13 +45,16 @@ public class MockDataSource extends cache.clear(); } + @Override public void init(Context context, Properties initProps) { } + @Override public Iterator> getData(String query) { return cache.get(query); } + @Override public void close() { cache.clear(); diff --git a/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/NumberFormatTransformer.java b/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/NumberFormatTransformer.java index d38ab75fcb0..36efecf5320 100644 --- a/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/NumberFormatTransformer.java +++ b/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/NumberFormatTransformer.java @@ -49,6 +49,7 @@ public class NumberFormatTransformer extends Transformer { private static final Pattern localeRegex = Pattern.compile("^([a-z]{2})-([A-Z]{2})$"); + @Override @SuppressWarnings("unchecked") public Object transformRow(Map row, Context context) { for (Map fld : context.getAllEntityFields()) { diff --git a/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/PlainTextEntityProcessor.java b/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/PlainTextEntityProcessor.java index 79a981875bc..2d32eee0122 100644 --- a/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/PlainTextEntityProcessor.java +++ b/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/PlainTextEntityProcessor.java @@ -40,11 +40,13 @@ public class PlainTextEntityProcessor extends EntityProcessorBase { private static final Logger LOG = LoggerFactory.getLogger(PlainTextEntityProcessor.class); private boolean ended = false; + @Override public void init(Context context) { super.init(context); ended = false; } + @Override public Map nextRow() { if (ended) return null; DataSource ds = context.getDataSource(); diff --git a/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/RegexTransformer.java b/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/RegexTransformer.java index e5910093e73..429bb0cf2f3 100644 --- a/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/RegexTransformer.java +++ b/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/RegexTransformer.java @@ -43,6 +43,7 @@ import java.util.regex.Pattern; public class RegexTransformer extends Transformer { private static final Logger LOG = LoggerFactory.getLogger(RegexTransformer.class); + @Override @SuppressWarnings("unchecked") public Map transformRow(Map row, Context ctx) { diff --git a/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/ScriptTransformer.java b/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/ScriptTransformer.java index ba06f49b91d..547fc66cf2a 100644 --- a/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/ScriptTransformer.java +++ b/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/ScriptTransformer.java @@ -47,6 +47,7 @@ public class ScriptTransformer extends Transformer { private String functionName; + @Override public Object transformRow(Map row, Context context) { try { if (engine == null) diff --git a/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/SqlEntityProcessor.java b/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/SqlEntityProcessor.java index 925a9569bf2..1748998720b 100644 --- a/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/SqlEntityProcessor.java +++ b/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/SqlEntityProcessor.java @@ -46,6 +46,7 @@ public class SqlEntityProcessor extends EntityProcessorBase { protected DataSource>> dataSource; + @Override @SuppressWarnings("unchecked") public void init(Context context) { super.init(context); @@ -65,6 +66,7 @@ public class SqlEntityProcessor extends EntityProcessorBase { } } + @Override public Map nextRow() { if (rowIterator == null) { String q = getQuery(); @@ -73,6 +75,7 @@ public class SqlEntityProcessor extends EntityProcessorBase { return getNext(); } + @Override public Map nextModifiedRowKey() { if (rowIterator == null) { String deltaQuery = context.getEntityAttribute(DELTA_QUERY); @@ -83,6 +86,7 @@ public class SqlEntityProcessor extends EntityProcessorBase { return getNext(); } + @Override public Map nextDeletedRowKey() { if (rowIterator == null) { String deletedPkQuery = context.getEntityAttribute(DEL_PK_QUERY); @@ -93,6 +97,7 @@ public class SqlEntityProcessor extends EntityProcessorBase { return getNext(); } + @Override public Map nextModifiedParentRowKey() { if (rowIterator == null) { String parentDeltaQuery = context.getEntityAttribute(PARENT_DELTA_QUERY); diff --git a/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/TemplateTransformer.java b/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/TemplateTransformer.java index 8c5527983ff..6fd0665c700 100644 --- a/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/TemplateTransformer.java +++ b/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/TemplateTransformer.java @@ -51,6 +51,7 @@ public class TemplateTransformer extends Transformer { private static final Logger LOG = LoggerFactory.getLogger(TemplateTransformer.class); private Map> templateVsVars = new HashMap>(); + @Override @SuppressWarnings("unchecked") public Object transformRow(Map row, Context context) { diff --git a/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/URLDataSource.java b/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/URLDataSource.java index 274c120b270..234fb56f264 100644 --- a/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/URLDataSource.java +++ b/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/URLDataSource.java @@ -56,6 +56,7 @@ public class URLDataSource extends DataSource { public URLDataSource() { } + @Override public void init(Context context, Properties initProps) { this.context = context; this.initProps = initProps; @@ -81,6 +82,7 @@ public class URLDataSource extends DataSource { } } + @Override public Reader getData(String query) { URL url = null; try { @@ -114,6 +116,7 @@ public class URLDataSource extends DataSource { } } + @Override public void close() { } diff --git a/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/VariableResolverImpl.java b/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/VariableResolverImpl.java index b0675cc4627..8d39dd13bde 100644 --- a/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/VariableResolverImpl.java +++ b/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/VariableResolverImpl.java @@ -91,10 +91,12 @@ public class VariableResolverImpl extends VariableResolver { container.remove(name); } + @Override public String replaceTokens(String template) { return templateString.replaceTokens(template, this); } + @Override @SuppressWarnings("unchecked") public Object resolve(String name) { if (name == null) diff --git a/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/XPathEntityProcessor.java b/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/XPathEntityProcessor.java index 26196788dfe..e995fab3442 100644 --- a/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/XPathEntityProcessor.java +++ b/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/XPathEntityProcessor.java @@ -80,6 +80,7 @@ public class XPathEntityProcessor extends EntityProcessorBase { protected Thread publisherThread; + @Override @SuppressWarnings("unchecked") public void init(Context context) { super.init(context); @@ -171,6 +172,7 @@ public class XPathEntityProcessor extends EntityProcessorBase { } + @Override public Map nextRow() { Map result; @@ -398,6 +400,7 @@ public class XPathEntityProcessor extends EntityProcessorBase { final AtomicBoolean isEnd = new AtomicBoolean(false); final AtomicBoolean throwExp = new AtomicBoolean(true); publisherThread = new Thread() { + @Override public void run() { try { xpathReader.streamRecords(data, new XPathRecordReader.Handler() { diff --git a/solr/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/AbstractDataImportHandlerTestCase.java b/solr/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/AbstractDataImportHandlerTestCase.java index 07f78d0b412..8c3791f224a 100644 --- a/solr/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/AbstractDataImportHandlerTestCase.java +++ b/solr/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/AbstractDataImportHandlerTestCase.java @@ -197,89 +197,110 @@ public abstract class AbstractDataImportHandlerTestCase extends this.root = root; } + @Override public String getEntityAttribute(String name) { return entityAttrs == null ? delegate.getEntityAttribute(name) : entityAttrs.get(name); } + @Override public String getResolvedEntityAttribute(String name) { return entityAttrs == null ? delegate.getResolvedEntityAttribute(name) : delegate.getVariableResolver().replaceTokens(entityAttrs.get(name)); } + @Override public List> getAllEntityFields() { return entityFields == null ? delegate.getAllEntityFields() : entityFields; } + @Override public VariableResolver getVariableResolver() { return delegate.getVariableResolver(); } + @Override public DataSource getDataSource() { return delegate.getDataSource(); } + @Override public boolean isRootEntity() { return root; } + @Override public String currentProcess() { return delegate.currentProcess(); } + @Override public Map getRequestParameters() { return delegate.getRequestParameters(); } + @Override public EntityProcessor getEntityProcessor() { return null; } + @Override public void setSessionAttribute(String name, Object val, String scope) { delegate.setSessionAttribute(name, val, scope); } + @Override public Object getSessionAttribute(String name, String scope) { return delegate.getSessionAttribute(name, scope); } + @Override public Context getParentContext() { return delegate.getParentContext(); } + @Override public DataSource getDataSource(String name) { return delegate.getDataSource(name); } + @Override public SolrCore getSolrCore() { return delegate.getSolrCore(); } + @Override public Map getStats() { return delegate.getStats(); } + @Override public String getScript() { return script == null ? delegate.getScript() : script; } + @Override public String getScriptLanguage() { return scriptlang == null ? delegate.getScriptLanguage() : scriptlang; } + @Override public void deleteDoc(String id) { } + @Override public void deleteDocByQuery(String query) { } + @Override public Object resolve(String var) { return delegate.resolve(var); } + @Override public String replaceTokens(String template) { return delegate.replaceTokens(template); } @@ -318,31 +339,37 @@ public abstract class AbstractDataImportHandlerTestCase extends reset(); } + @Override public void finish() throws IOException { finishCalled = true; super.finish(); } + @Override public void processAdd(AddUpdateCommand cmd) throws IOException { processAddCalled = true; super.processAdd(cmd); } + @Override public void processCommit(CommitUpdateCommand cmd) throws IOException { processCommitCalled = true; super.processCommit(cmd); } + @Override public void processDelete(DeleteUpdateCommand cmd) throws IOException { processDeleteCalled = true; super.processDelete(cmd); } + @Override public void processMergeIndexes(MergeIndexesCommand cmd) throws IOException { mergeIndexesCalled = true; super.processMergeIndexes(cmd); } + @Override public void processRollback(RollbackUpdateCommand cmd) throws IOException { rollbackCalled = true; super.processRollback(cmd); diff --git a/solr/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestCachedSqlEntityProcessor.java b/solr/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestCachedSqlEntityProcessor.java index ceda6edd3ec..0037d6796ca 100644 --- a/solr/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestCachedSqlEntityProcessor.java +++ b/solr/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestCachedSqlEntityProcessor.java @@ -158,6 +158,7 @@ public class TestCachedSqlEntityProcessor extends AbstractDataImportHandlerTestC public static class DoubleTransformer extends Transformer { + @Override public Object transformRow(Map row, Context context) { List> rows = new ArrayList>(); rows.add(row); @@ -169,6 +170,7 @@ public class TestCachedSqlEntityProcessor extends AbstractDataImportHandlerTestC public static class UppercaseTransformer extends Transformer { + @Override public Object transformRow(Map row, Context context) { for (Map.Entry entry : row.entrySet()) { Object val = entry.getValue(); diff --git a/solr/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestContentStreamDataSource.java b/solr/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestContentStreamDataSource.java index c49be006377..692272d4e04 100644 --- a/solr/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestContentStreamDataSource.java +++ b/solr/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestContentStreamDataSource.java @@ -43,6 +43,7 @@ public class TestContentStreamDataSource extends AbstractDataImportHandlerTestCa SolrInstance instance = null; JettySolrRunner jetty; + @Override @Before public void setUp() throws Exception { super.setUp(); @@ -51,6 +52,7 @@ public class TestContentStreamDataSource extends AbstractDataImportHandlerTestCa jetty = createJetty(instance); } + @Override @After public void tearDown() throws Exception { jetty.stop(); diff --git a/solr/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestDataConfig.java b/solr/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestDataConfig.java index cef348493dd..39b01ddedce 100644 --- a/solr/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestDataConfig.java +++ b/solr/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestDataConfig.java @@ -19,9 +19,10 @@ package org.apache.solr.handler.dataimport; import org.junit.BeforeClass; import org.junit.Test; import org.w3c.dom.Document; +import org.xml.sax.InputSource; import javax.xml.parsers.DocumentBuilderFactory; -import java.io.ByteArrayInputStream; +import java.io.StringReader; import java.util.ArrayList; import java.util.List; @@ -55,7 +56,7 @@ public class TestDataConfig extends AbstractDataImportHandlerTestCase { public void testBasic() throws Exception { javax.xml.parsers.DocumentBuilder builder = DocumentBuilderFactory .newInstance().newDocumentBuilder(); - Document doc = builder.parse(new ByteArrayInputStream(xml.getBytes())); + Document doc = builder.parse(new InputSource(new StringReader(xml))); DataConfig dc = new DataConfig(); dc.readFromXml(doc.getDocumentElement()); diff --git a/solr/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestDocBuilder.java b/solr/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestDocBuilder.java index e9947e52e76..a16b7017ab4 100644 --- a/solr/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestDocBuilder.java +++ b/solr/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestDocBuilder.java @@ -33,6 +33,7 @@ import java.util.*; */ public class TestDocBuilder extends AbstractDataImportHandlerTestCase { + @Override @After public void tearDown() throws Exception { MockDataSource.clearCache(); @@ -200,22 +201,27 @@ public class TestDocBuilder extends AbstractDataImportHandlerTestCase { super(null, ".",null); } + @Override public boolean upload(SolrInputDocument doc) { return docs.add(doc); } + @Override public void log(int event, String name, Object row) { // Do nothing } + @Override public void doDeleteAll() { deleteAllCalled = Boolean.TRUE; } + @Override public void commit(boolean b) { commitCalled = Boolean.TRUE; } + @Override public void finish() { finishCalled = Boolean.TRUE; } diff --git a/solr/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestDocBuilder2.java b/solr/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestDocBuilder2.java index f361eb20a43..4632318fa17 100644 --- a/solr/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestDocBuilder2.java +++ b/solr/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestDocBuilder2.java @@ -252,6 +252,7 @@ public class TestDocBuilder2 extends AbstractDataImportHandlerTestCase { } public static class MockTransformer extends Transformer { + @Override public Object transformRow(Map row, Context context) { assertTrue("Context gave incorrect data source", context.getDataSource("mockDs") instanceof MockDataSource2); return row; @@ -259,6 +260,7 @@ public class TestDocBuilder2 extends AbstractDataImportHandlerTestCase { } public static class AddDynamicFieldTransformer extends Transformer { + @Override public Object transformRow(Map row, Context context) { // Add a dynamic field row.put("dynamic_s", "test"); diff --git a/solr/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestEntityProcessorBase.java b/solr/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestEntityProcessorBase.java index 2b7d3578e96..42b29610666 100644 --- a/solr/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestEntityProcessorBase.java +++ b/solr/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestEntityProcessorBase.java @@ -57,6 +57,7 @@ public class TestEntityProcessorBase extends AbstractDataImportHandlerTestCase { static class T1 extends Transformer { + @Override public Object transformRow(Map aRow, Context context) { aRow.put("T1", "T1 called"); return aRow; @@ -66,6 +67,7 @@ public class TestEntityProcessorBase extends AbstractDataImportHandlerTestCase { static class T2 extends Transformer { + @Override public Object transformRow(Map aRow, Context context) { aRow.put("T2", "T2 called"); return aRow; diff --git a/solr/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestErrorHandling.java b/solr/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestErrorHandling.java index 0f703815bae..b8e285dffe1 100644 --- a/solr/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestErrorHandling.java +++ b/solr/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestErrorHandling.java @@ -78,19 +78,23 @@ public class TestErrorHandling extends AbstractDataImportHandlerTestCase { public static class StringDataSource extends DataSource { public static String xml = ""; + @Override public void init(Context context, Properties initProps) { } + @Override public Reader getData(String query) { return new StringReader(xml); } + @Override public void close() { } } public static class ExceptionTransformer extends Transformer { + @Override public Object transformRow(Map row, Context context) { throw new RuntimeException("Test exception"); } diff --git a/solr/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestEvaluatorBag.java b/solr/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestEvaluatorBag.java index 18b30a36d7b..41ac1dc5d15 100644 --- a/solr/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestEvaluatorBag.java +++ b/solr/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestEvaluatorBag.java @@ -39,6 +39,7 @@ public class TestEvaluatorBag extends AbstractDataImportHandlerTestCase { Map urlTests; + @Override @Before public void setUp() throws Exception { super.setUp(); diff --git a/solr/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestJdbcDataSource.java b/solr/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestJdbcDataSource.java index 96a145d6bcf..ac6626462ee 100644 --- a/solr/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestJdbcDataSource.java +++ b/solr/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestJdbcDataSource.java @@ -57,18 +57,20 @@ public class TestJdbcDataSource extends AbstractDataImportHandlerTestCase { String sysProp = System.getProperty("java.naming.factory.initial"); + @Override @Before public void setUp() throws Exception { super.setUp(); System.setProperty("java.naming.factory.initial", MockInitialContextFactory.class.getName()); - + mockControl = EasyMock.createStrictControl(); driver = mockControl.createMock(Driver.class); dataSource = mockControl.createMock(DataSource.class); connection = mockControl.createMock(Connection.class); } + @Override @After public void tearDown() throws Exception { if (sysProp == null) { @@ -77,6 +79,7 @@ public class TestJdbcDataSource extends AbstractDataImportHandlerTestCase { System.setProperty("java.naming.factory.initial", sysProp); } super.tearDown(); + mockControl.reset(); } @Test diff --git a/solr/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestLineEntityProcessor.java b/solr/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestLineEntityProcessor.java index c24fced0bbf..91f8d034cda 100644 --- a/solr/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestLineEntityProcessor.java +++ b/solr/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestLineEntityProcessor.java @@ -207,12 +207,15 @@ public class TestLineEntityProcessor extends AbstractDataImportHandlerTestCase { private DataSource getDataSource(final String xml) { return new DataSource() { + @Override public void init(Context context, Properties initProps) { } + @Override public void close() { } + @Override public Reader getData(String query) { return new StringReader(xml); } diff --git a/solr/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestPlainTextEntityProcessor.java b/solr/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestPlainTextEntityProcessor.java index 48a0b1b4214..e0a5b8bf39c 100644 --- a/solr/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestPlainTextEntityProcessor.java +++ b/solr/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestPlainTextEntityProcessor.java @@ -42,15 +42,18 @@ public class TestPlainTextEntityProcessor extends AbstractDataImportHandlerTestC public static class DS extends DataSource { static String s = "hello world"; + @Override public void init(Context context, Properties initProps) { } + @Override public Object getData(String query) { return new StringReader(s); } + @Override public void close() { } diff --git a/solr/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestSqlEntityProcessor.java b/solr/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestSqlEntityProcessor.java index 7fc50fa11cd..0fbfb846eae 100644 --- a/solr/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestSqlEntityProcessor.java +++ b/solr/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestSqlEntityProcessor.java @@ -135,19 +135,23 @@ public class TestSqlEntityProcessor extends AbstractDataImportHandlerTestCase { private static DataSource>> getDs( final List> rows) { return new DataSource>>() { + @Override public Iterator> getData(String query) { return rows.iterator(); } + @Override public void init(Context context, Properties initProps) { } + @Override public void close() { } }; } public static class T extends Transformer { + @Override public Object transformRow(Map aRow, Context context) { aRow.put("T", "Class T"); return aRow; @@ -162,6 +166,7 @@ public class TestSqlEntityProcessor extends AbstractDataImportHandlerTestCase { } public static class T2 extends Transformer { + @Override public Object transformRow(Map aRow, Context context) { Integer count = local.get(); local.set(count + 1); diff --git a/solr/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestSqlEntityProcessor2.java b/solr/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestSqlEntityProcessor2.java index 966818b4ef5..6c0627e3e07 100644 --- a/solr/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestSqlEntityProcessor2.java +++ b/solr/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestSqlEntityProcessor2.java @@ -214,6 +214,7 @@ public class TestSqlEntityProcessor2 extends AbstractDataImportHandlerTestCase { } static class DateFormatValidatingEvaluator extends Evaluator { + @Override public String evaluate(String expression, Context context) { List l = EvaluatorBag.parseParams(expression, context.getVariableResolver()); Object o = l.get(0); diff --git a/solr/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestVariableResolver.java b/solr/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestVariableResolver.java index 2befb0bdaa0..55c4b91ee34 100644 --- a/solr/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestVariableResolver.java +++ b/solr/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestVariableResolver.java @@ -169,6 +169,7 @@ public class TestVariableResolver extends AbstractDataImportHandlerTestCase { } public static class E extends Evaluator{ + @Override public String evaluate(String expression, Context context) { return "Hello World"; } diff --git a/solr/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestXPathEntityProcessor.java b/solr/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestXPathEntityProcessor.java index 29672e5b258..596fa33b9d9 100644 --- a/solr/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestXPathEntityProcessor.java +++ b/solr/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestXPathEntityProcessor.java @@ -46,7 +46,7 @@ public class TestXPathEntityProcessor extends AbstractDataImportHandlerTestCase tmpdir.delete(); tmpdir.mkdir(); tmpdir.deleteOnExit(); - createFile(tmpdir, "x.xsl", xsl.getBytes(), false); + createFile(tmpdir, "x.xsl", xsl.getBytes("UTF-8"), false); Map entityAttrs = createMap("name", "e", "url", "cd.xml", XPathEntityProcessor.FOR_EACH, "/catalog/cd"); List fields = new ArrayList(); @@ -211,7 +211,7 @@ public class TestXPathEntityProcessor extends AbstractDataImportHandlerTestCase tmpdir.delete(); tmpdir.mkdir(); tmpdir.deleteOnExit(); - TestFileListEntityProcessor.createFile(tmpdir, "x.xsl", xsl.getBytes(), + TestFileListEntityProcessor.createFile(tmpdir, "x.xsl", xsl.getBytes("UTF-8"), false); Map entityAttrs = createMap("name", "e", XPathEntityProcessor.USE_SOLR_ADD_SCHEMA, "true", "xsl", "" @@ -236,12 +236,15 @@ public class TestXPathEntityProcessor extends AbstractDataImportHandlerTestCase private DataSource getDataSource(final String xml) { return new DataSource() { + @Override public void init(Context context, Properties initProps) { } + @Override public void close() { } + @Override public Reader getData(String query) { return new StringReader(xml); } diff --git a/solr/contrib/extraction/CHANGES.txt b/solr/contrib/extraction/CHANGES.txt index 86d23bda34c..4309db21b21 100644 --- a/solr/contrib/extraction/CHANGES.txt +++ b/solr/contrib/extraction/CHANGES.txt @@ -24,9 +24,13 @@ Current Version: Tika 0.8 (released 11/07/2010) $Id:$ +================== Release 4.0-dev ================== + +(No Changes) + ================== Release 3.1-dev ================== -* Upgraded to Tika 0.8 and changed deprecated parse call +* SOLR-1902: Upgraded to Tika 0.8 and changed deprecated parse call * SOLR-1756: The date.format setting causes ClassCastException when enabled and the config code that parses this setting does not properly use the same iterator instance. (Christoph Brill, Mark Miller) diff --git a/solr/contrib/extraction/build.xml b/solr/contrib/extraction/build.xml index 73182c45264..de7542d54b4 100644 --- a/solr/contrib/extraction/build.xml +++ b/solr/contrib/extraction/build.xml @@ -40,7 +40,7 @@ - +
    diff --git a/solr/contrib/extraction/solr-cell-pom.xml.template b/solr/contrib/extraction/solr-cell-pom.xml.template deleted file mode 100644 index 2285353a2a7..00000000000 --- a/solr/contrib/extraction/solr-cell-pom.xml.template +++ /dev/null @@ -1,51 +0,0 @@ - - - - - 4.0.0 - - - org.apache.solr - solr-parent - @version@ - - - org.apache.solr - solr-cell - Apache Solr Content Extraction Library - @version@ - Apache Solr Content Extraction Library integrates Apache Tika content extraction framework into Solr - jar - - - - org.apache.tika - tika-core - 0.8-SNAPSHOT - - - org.apache.tika - tika-parsers - 0.8-SNAPSHOT - - - diff --git a/solr/contrib/extraction/src/main/java/org/apache/solr/handler/extraction/ExtractingDocumentLoader.java b/solr/contrib/extraction/src/main/java/org/apache/solr/handler/extraction/ExtractingDocumentLoader.java index a08967dfcc6..e7d03b9bb7c 100644 --- a/solr/contrib/extraction/src/main/java/org/apache/solr/handler/extraction/ExtractingDocumentLoader.java +++ b/solr/contrib/extraction/src/main/java/org/apache/solr/handler/extraction/ExtractingDocumentLoader.java @@ -120,6 +120,7 @@ public class ExtractingDocumentLoader extends ContentStreamLoader { * @param stream * @throws java.io.IOException */ + @Override public void load(SolrQueryRequest req, SolrQueryResponse rsp, ContentStream stream) throws IOException { errHeader = "ExtractingDocumentLoader: " + stream.getSourceInfo(); Parser parser = null; diff --git a/solr/contrib/extraction/src/main/java/org/apache/solr/handler/extraction/ExtractingRequestHandler.java b/solr/contrib/extraction/src/main/java/org/apache/solr/handler/extraction/ExtractingRequestHandler.java index 943f0d849dc..d77a81491fd 100644 --- a/solr/contrib/extraction/src/main/java/org/apache/solr/handler/extraction/ExtractingRequestHandler.java +++ b/solr/contrib/extraction/src/main/java/org/apache/solr/handler/extraction/ExtractingRequestHandler.java @@ -111,6 +111,7 @@ public class ExtractingRequestHandler extends ContentStreamHandlerBase implement } + @Override protected ContentStreamLoader newLoader(SolrQueryRequest req, UpdateRequestProcessor processor) { return new ExtractingDocumentLoader(req, processor, config, factory); } diff --git a/solr/contrib/extraction/src/test/java/org/apache/solr/handler/ExtractingRequestHandlerTest.java b/solr/contrib/extraction/src/test/java/org/apache/solr/handler/ExtractingRequestHandlerTest.java index 1eac099acf0..441f6d3ce1a 100644 --- a/solr/contrib/extraction/src/test/java/org/apache/solr/handler/ExtractingRequestHandlerTest.java +++ b/solr/contrib/extraction/src/test/java/org/apache/solr/handler/ExtractingRequestHandlerTest.java @@ -46,6 +46,7 @@ public class ExtractingRequestHandlerTest extends SolrTestCaseJ4 { initCore("solrconfig.xml", "schema.xml", "solr-extraction"); } + @Override @Before public void setUp() throws Exception { super.setUp(); diff --git a/solr/contrib/extraction/src/test/resources/solr-extraction/conf/schema.xml b/solr/contrib/extraction/src/test/resources/solr-extraction/conf/schema.xml index 4d53a8f3391..8cc3aaaebe5 100644 --- a/solr/contrib/extraction/src/test/resources/solr-extraction/conf/schema.xml +++ b/solr/contrib/extraction/src/test/resources/solr-extraction/conf/schema.xml @@ -210,13 +210,14 @@ - + - + + diff --git a/solr/contrib/uima/CHANGES.txt b/solr/contrib/uima/CHANGES.txt new file mode 100644 index 00000000000..c0ca1a0960d --- /dev/null +++ b/solr/contrib/uima/CHANGES.txt @@ -0,0 +1,17 @@ +Apache Solr UIMA Metadata Extraction Library + Release Notes + +This file describes changes to the Solr UIMA (contrib/uima) module. See SOLR-2129 for details. + +Introduction +------------ +This module is intended to be used while indexing documents. +Its purpose is to provide additional on the fly automatically generated fields to the Solr index. +Such fields could be language, concepts, keywords, sentences, named entities, etc. + + UIMA Dependency + --------------- +uima-core, OpenCalaisAnnotator, WhitespaceTokenizer, HMMTagger, AlchemyAPIAnnotator +Current Version: 2.3.1-SNAPSHOT rev. 999276 + +$Id$ diff --git a/solr/contrib/uima/README.txt b/solr/contrib/uima/README.txt new file mode 100644 index 00000000000..b2b97293dac --- /dev/null +++ b/solr/contrib/uima/README.txt @@ -0,0 +1,60 @@ +Getting Started +--------------- +To start using Solr UIMA Metadata Extraction Library you should go through the following configuration steps: + +1. copy generated solr-uima jar and its libs (under contrib/uima/lib) inside a Solr libraries directory. + +2. modify your schema.xml adding the fields you want to be hold metadata specifying proper values for type, indexed, stored and multiValued options: + +3. for example you could specify the following + + + + +4. modify your solrconfig.xml adding the following snippet: + + + VALID_ALCHEMYAPI_KEY + VALID_ALCHEMYAPI_KEY + VALID_ALCHEMYAPI_KEY + VALID_ALCHEMYAPI_KEY + VALID_ALCHEMYAPI_KEY + VALID_OPENCALAIS_KEY + + /org/apache/uima/desc/OverridingParamsExtServicesAE.xml + text + + + + + + + + + + + + + +5. the analysisEngine tag must contain an AE descriptor inside the specified path in the classpath + +6. the analyzeFields tag must contain the input fields that need to be analyzed by UIMA, + if merge=true then their content will be merged and analyzed only once + +7. field mapping describes which features of which types should go in a field + +8. define in your solrconfig.xml an UpdateRequestProcessorChain as following: + + + + + + +9. in your solrconfig.xml replace the existing default ( + + uima + + + +Once you're done with the configuration you can index documents which will be automatically enriched with the specified fields diff --git a/solr/contrib/uima/build.xml b/solr/contrib/uima/build.xml new file mode 100644 index 00000000000..34dbefec748 --- /dev/null +++ b/solr/contrib/uima/build.xml @@ -0,0 +1,189 @@ + + + + + + + + + + + + Solr Integration with UIMA for extracting metadata from arbitrary (text) fields and enrich document with features extracted from UIMA types (language, sentences, concepts, named entities, etc.) + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Tests failed! + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/solr/contrib/uima/lib/commons-digester-2.0.jar b/solr/contrib/uima/lib/commons-digester-2.0.jar new file mode 100644 index 00000000000..bd9b6332cd4 --- /dev/null +++ b/solr/contrib/uima/lib/commons-digester-2.0.jar @@ -0,0 +1,2 @@ +AnyObjectId[9c8bd13a2002a9ff5b35b873b9f111d5281ad201] was removed in git history. +Apache SVN contains full history. \ No newline at end of file diff --git a/solr/contrib/uima/lib/commons-lang-2.4.jar b/solr/contrib/uima/lib/commons-lang-2.4.jar new file mode 100644 index 00000000000..2ef0c625eb9 --- /dev/null +++ b/solr/contrib/uima/lib/commons-lang-2.4.jar @@ -0,0 +1,2 @@ +AnyObjectId[532939ecab6b77ccb77af3635c55ff9752b70ab7] was removed in git history. +Apache SVN contains full history. \ No newline at end of file diff --git a/solr/contrib/uima/lib/uima-an-alchemy.jar b/solr/contrib/uima/lib/uima-an-alchemy.jar new file mode 100644 index 00000000000..5ef77543e3e --- /dev/null +++ b/solr/contrib/uima/lib/uima-an-alchemy.jar @@ -0,0 +1,2 @@ +AnyObjectId[33165678da937e03cb069449b40f1cf690beda0a] was removed in git history. +Apache SVN contains full history. \ No newline at end of file diff --git a/solr/contrib/uima/lib/uima-an-calais.jar b/solr/contrib/uima/lib/uima-an-calais.jar new file mode 100644 index 00000000000..bebd55ac233 --- /dev/null +++ b/solr/contrib/uima/lib/uima-an-calais.jar @@ -0,0 +1,2 @@ +AnyObjectId[5dfc32bce5e444a9bb3387d664485f7bfdc438ad] was removed in git history. +Apache SVN contains full history. \ No newline at end of file diff --git a/solr/contrib/uima/lib/uima-an-tagger.jar b/solr/contrib/uima/lib/uima-an-tagger.jar new file mode 100644 index 00000000000..6e879bd9553 --- /dev/null +++ b/solr/contrib/uima/lib/uima-an-tagger.jar @@ -0,0 +1,2 @@ +AnyObjectId[bf90c19d2c1f77e300b94363385841ec1225b4b9] was removed in git history. +Apache SVN contains full history. \ No newline at end of file diff --git a/solr/contrib/uima/lib/uima-an-wst.jar b/solr/contrib/uima/lib/uima-an-wst.jar new file mode 100644 index 00000000000..d0ce4c499c6 --- /dev/null +++ b/solr/contrib/uima/lib/uima-an-wst.jar @@ -0,0 +1,2 @@ +AnyObjectId[9518da64cdf5d378273ab40a06823a7768f18ece] was removed in git history. +Apache SVN contains full history. \ No newline at end of file diff --git a/solr/contrib/uima/lib/uima-core.jar b/solr/contrib/uima/lib/uima-core.jar new file mode 100644 index 00000000000..37d22bb6b65 --- /dev/null +++ b/solr/contrib/uima/lib/uima-core.jar @@ -0,0 +1,2 @@ +AnyObjectId[72991424bdfe4776f66feab7ff4e8564f12d2659] was removed in git history. +Apache SVN contains full history. \ No newline at end of file diff --git a/solr/contrib/uima/src/main/java/org/apache/solr/uima/processor/SolrUIMAConfiguration.java b/solr/contrib/uima/src/main/java/org/apache/solr/uima/processor/SolrUIMAConfiguration.java new file mode 100644 index 00000000000..2ba2d7f4fc5 --- /dev/null +++ b/solr/contrib/uima/src/main/java/org/apache/solr/uima/processor/SolrUIMAConfiguration.java @@ -0,0 +1,69 @@ +package org.apache.solr.uima.processor; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.util.Map; + +/** + * Configuration holding all the configurable parameters for calling UIMA inside Solr + * + * @version $Id$ + */ +public class SolrUIMAConfiguration { + + private String[] fieldsToAnalyze; + + private boolean fieldsMerging; + + private Map> typesFeaturesFieldsMapping; + + private String aePath; + + private Map runtimeParameters; + + public SolrUIMAConfiguration(String aePath, String[] fieldsToAnalyze, boolean fieldsMerging, + Map> typesFeaturesFieldsMapping, + Map runtimeParameters) { + this.aePath = aePath; + this.fieldsToAnalyze = fieldsToAnalyze; + this.fieldsMerging = fieldsMerging; + this.runtimeParameters = runtimeParameters; + this.typesFeaturesFieldsMapping = typesFeaturesFieldsMapping; + } + + public String[] getFieldsToAnalyze() { + return fieldsToAnalyze; + } + + public boolean isFieldsMerging() { + return fieldsMerging; + } + + public Map> getTypesFeaturesFieldsMapping() { + return typesFeaturesFieldsMapping; + } + + public String getAePath() { + return aePath; + } + + public Map getRuntimeParameters() { + return runtimeParameters; + } + +} diff --git a/solr/contrib/uima/src/main/java/org/apache/solr/uima/processor/SolrUIMAConfigurationReader.java b/solr/contrib/uima/src/main/java/org/apache/solr/uima/processor/SolrUIMAConfigurationReader.java new file mode 100644 index 00000000000..4ffeb83fa56 --- /dev/null +++ b/solr/contrib/uima/src/main/java/org/apache/solr/uima/processor/SolrUIMAConfigurationReader.java @@ -0,0 +1,125 @@ +package org.apache.solr.uima.processor; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.util.HashMap; +import java.util.Map; + +import org.apache.solr.core.SolrConfig; +import org.w3c.dom.Node; +import org.w3c.dom.NodeList; + +/** + * Read configuration for Solr-UIMA integration + * + * @version $Id$ + * + */ +public class SolrUIMAConfigurationReader { + + private static final String AE_RUNTIME_PARAMETERS_NODE_PATH = "/config/uimaConfig/runtimeParameters"; + + private static final String FIELD_MAPPING_NODE_PATH = "/config/uimaConfig/fieldMapping"; + + private static final String ANALYZE_FIELDS_NODE_PATH = "/config/uimaConfig/analyzeFields"; + + private static final String ANALYSIS_ENGINE_NODE_PATH = "/config/uimaConfig/analysisEngine"; + + private SolrConfig solrConfig; + + public SolrUIMAConfigurationReader(SolrConfig solrConfig) { + this.solrConfig = solrConfig; + } + + public SolrUIMAConfiguration readSolrUIMAConfiguration() { + return new SolrUIMAConfiguration(readAEPath(), readFieldsToAnalyze(), readFieldsMerging(), + readTypesFeaturesFieldsMapping(), readAEOverridingParameters()); + } + + private String readAEPath() { + return solrConfig.getNode(ANALYSIS_ENGINE_NODE_PATH, true).getTextContent(); + } + + private String[] readFieldsToAnalyze() { + Node analyzeFieldsNode = solrConfig.getNode(ANALYZE_FIELDS_NODE_PATH, true); + return analyzeFieldsNode.getTextContent().split(","); + } + + private boolean readFieldsMerging() { + Node analyzeFieldsNode = solrConfig.getNode(ANALYZE_FIELDS_NODE_PATH, true); + Node mergeNode = analyzeFieldsNode.getAttributes().getNamedItem("merge"); + return Boolean.valueOf(mergeNode.getNodeValue()); + } + + private Map> readTypesFeaturesFieldsMapping() { + Map> map = new HashMap>(); + + Node fieldMappingNode = solrConfig.getNode(FIELD_MAPPING_NODE_PATH, true); + /* iterate over UIMA types */ + if (fieldMappingNode.hasChildNodes()) { + NodeList typeNodes = fieldMappingNode.getChildNodes(); + for (int i = 0; i < typeNodes.getLength(); i++) { + /* node */ + Node typeNode = typeNodes.item(i); + if (typeNode.getNodeType() != Node.TEXT_NODE) { + Node typeNameAttribute = typeNode.getAttributes().getNamedItem("name"); + /* get a UIMA typename */ + String typeName = typeNameAttribute.getNodeValue(); + /* create entry for UIMA type */ + map.put(typeName, new HashMap()); + if (typeNode.hasChildNodes()) { + /* iterate over features */ + NodeList featuresNodeList = typeNode.getChildNodes(); + for (int j = 0; j < featuresNodeList.getLength(); j++) { + Node mappingNode = featuresNodeList.item(j); + if (mappingNode.getNodeType() != Node.TEXT_NODE) { + /* get field name */ + Node fieldNameNode = mappingNode.getAttributes().getNamedItem("field"); + String mappedFieldName = fieldNameNode.getNodeValue(); + /* get feature name */ + Node featureNameNode = mappingNode.getAttributes().getNamedItem("feature"); + String featureName = featureNameNode.getNodeValue(); + /* map the feature to the field for the specified type */ + map.get(typeName).put(featureName, mappedFieldName); + } + } + } + } + } + } + return map; + } + + private Map readAEOverridingParameters() { + Map runtimeParameters = new HashMap(); + Node uimaConfigNode = solrConfig.getNode(AE_RUNTIME_PARAMETERS_NODE_PATH, true); + + if (uimaConfigNode.hasChildNodes()) { + NodeList overridingNodes = uimaConfigNode.getChildNodes(); + for (int i = 0; i < overridingNodes.getLength(); i++) { + Node overridingNode = overridingNodes.item(i); + if (overridingNode.getNodeType() != Node.TEXT_NODE) { + runtimeParameters.put(overridingNode.getNodeName(), overridingNode.getTextContent()); + } + } + } + + return runtimeParameters; + } + +} diff --git a/solr/contrib/uima/src/main/java/org/apache/solr/uima/processor/UIMAToSolrMapper.java b/solr/contrib/uima/src/main/java/org/apache/solr/uima/processor/UIMAToSolrMapper.java new file mode 100644 index 00000000000..29e7b5c2926 --- /dev/null +++ b/solr/contrib/uima/src/main/java/org/apache/solr/uima/processor/UIMAToSolrMapper.java @@ -0,0 +1,83 @@ +package org.apache.solr.uima.processor; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.util.Map; + +import org.apache.solr.common.SolrInputDocument; +import org.apache.uima.cas.FSIterator; +import org.apache.uima.cas.FeatureStructure; +import org.apache.uima.cas.Type; +import org.apache.uima.jcas.JCas; +import org.apache.uima.jcas.tcas.Annotation; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Map UIMA types and features over fields of a Solr document + * + * @version $Id$ + */ +public class UIMAToSolrMapper { + + private final Logger log = LoggerFactory.getLogger(UIMAToSolrMapper.class); + + private SolrInputDocument document; + + private JCas cas; + + public UIMAToSolrMapper(SolrInputDocument document, JCas cas) { + this.document = document; + this.cas = cas; + } + + /** + * map features of a certain UIMA type to corresponding Solr fields based on the mapping + * + * @param typeName + * name of UIMA type to map + * @param featureFieldsmapping + */ + public void map(String typeName, Map featureFieldsmapping) { + try { + FeatureStructure fsMock = (FeatureStructure) Class.forName(typeName).getConstructor( + JCas.class).newInstance(cas); + Type type = fsMock.getType(); + for (FSIterator iterator = cas.getFSIndexRepository().getAllIndexedFS(type); iterator + .hasNext();) { + FeatureStructure fs = iterator.next(); + for (String featureName : featureFieldsmapping.keySet()) { + String fieldName = featureFieldsmapping.get(featureName); + log.info(new StringBuffer("mapping ").append(typeName).append("@").append(featureName) + .append(" to ").append(fieldName).toString()); + String featureValue = null; + if (fs instanceof Annotation && "coveredText".equals(featureName)) { + featureValue = ((Annotation) fs).getCoveredText(); + } else { + featureValue = fs.getFeatureValueAsString(type.getFeatureByBaseName(featureName)); + } + log.info(new StringBuffer("writing ").append(featureValue).append(" in ").append( + fieldName).toString()); + document.addField(fieldName, featureValue, 1.0f); + } + } + } catch (Exception e) { + log.error(e.getLocalizedMessage()); + } + } +} diff --git a/solr/contrib/uima/src/main/java/org/apache/solr/uima/processor/UIMAUpdateRequestProcessor.java b/solr/contrib/uima/src/main/java/org/apache/solr/uima/processor/UIMAUpdateRequestProcessor.java new file mode 100644 index 00000000000..4f7e004666c --- /dev/null +++ b/solr/contrib/uima/src/main/java/org/apache/solr/uima/processor/UIMAUpdateRequestProcessor.java @@ -0,0 +1,127 @@ +package org.apache.solr.uima.processor; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.util.Map; + +import org.apache.solr.common.SolrInputDocument; +import org.apache.solr.core.SolrCore; +import org.apache.solr.uima.processor.ae.AEProvider; +import org.apache.solr.uima.processor.ae.AEProviderFactory; +import org.apache.solr.update.AddUpdateCommand; +import org.apache.solr.update.processor.UpdateRequestProcessor; +import org.apache.uima.UIMAException; +import org.apache.uima.analysis_engine.AnalysisEngine; +import org.apache.uima.analysis_engine.AnalysisEngineProcessException; +import org.apache.uima.jcas.JCas; +import org.apache.uima.resource.ResourceInitializationException; + +/** + * Update document(s) to be indexed with UIMA extracted information + * + * @version $Id$ + */ +public class UIMAUpdateRequestProcessor extends UpdateRequestProcessor { + + private SolrUIMAConfiguration solrUIMAConfiguration; + + private AEProvider aeProvider; + + public UIMAUpdateRequestProcessor(UpdateRequestProcessor next, SolrCore solrCore) { + super(next); + initialize(solrCore); + } + + private void initialize(SolrCore solrCore) { + SolrUIMAConfigurationReader uimaConfigurationReader = new SolrUIMAConfigurationReader(solrCore + .getSolrConfig()); + solrUIMAConfiguration = uimaConfigurationReader.readSolrUIMAConfiguration(); + aeProvider = AEProviderFactory.getInstance().getAEProvider(solrCore.getName(), + solrUIMAConfiguration.getAePath(), solrUIMAConfiguration.getRuntimeParameters()); + } + + @Override + public void processAdd(AddUpdateCommand cmd) throws IOException { + try { + /* get Solr document */ + SolrInputDocument solrInputDocument = cmd.getSolrInputDocument(); + + /* get the fields to analyze */ + for (String text : getTextsToAnalyze(solrInputDocument)) { + if (text != null && !"".equals(text)) { + /* process the text value */ + JCas jcas = processText(text); + + UIMAToSolrMapper uimaToSolrMapper = new UIMAToSolrMapper(solrInputDocument, jcas); + /* get field mapping from config */ + Map> typesAndFeaturesFieldsMap = solrUIMAConfiguration + .getTypesFeaturesFieldsMapping(); + /* map type features on fields */ + for (String typeFQN : typesAndFeaturesFieldsMap.keySet()) { + uimaToSolrMapper.map(typeFQN, typesAndFeaturesFieldsMap.get(typeFQN)); + } + } + } + } catch (UIMAException e) { + throw new RuntimeException(e); + } + super.processAdd(cmd); + } + + /* + * get the texts to analyze from the corresponding fields + */ + private String[] getTextsToAnalyze(SolrInputDocument solrInputDocument) { + String[] fieldsToAnalyze = solrUIMAConfiguration.getFieldsToAnalyze(); + boolean merge = solrUIMAConfiguration.isFieldsMerging(); + String[] textVals = null; + if (merge) { + StringBuilder unifiedText = new StringBuilder(""); + for (int i = 0; i < fieldsToAnalyze.length; i++) { + unifiedText.append(String.valueOf(solrInputDocument.getFieldValue(fieldsToAnalyze[i]))); + } + textVals = new String[1]; + textVals[0] = unifiedText.toString(); + } else { + textVals = new String[fieldsToAnalyze.length]; + for (int i = 0; i < fieldsToAnalyze.length; i++) { + textVals[i] = String.valueOf(solrInputDocument.getFieldValue(fieldsToAnalyze[i])); + } + } + return textVals; + } + + /* process a field value executing UIMA the CAS containing it as document text */ + private JCas processText(String textFieldValue) throws ResourceInitializationException, + AnalysisEngineProcessException { + log.info(new StringBuffer("Analazying text").toString()); + /* get the UIMA analysis engine */ + AnalysisEngine ae = aeProvider.getAE(); + + /* create a JCas which contain the text to analyze */ + JCas jcas = ae.newJCas(); + jcas.setDocumentText(textFieldValue); + + /* perform analysis on text field */ + ae.process(jcas); + log.info(new StringBuilder("Text processing completed").toString()); + return jcas; + } + +} diff --git a/solr/contrib/uima/src/main/java/org/apache/solr/uima/processor/UIMAUpdateRequestProcessorFactory.java b/solr/contrib/uima/src/main/java/org/apache/solr/uima/processor/UIMAUpdateRequestProcessorFactory.java new file mode 100644 index 00000000000..b8167572195 --- /dev/null +++ b/solr/contrib/uima/src/main/java/org/apache/solr/uima/processor/UIMAUpdateRequestProcessorFactory.java @@ -0,0 +1,38 @@ +package org.apache.solr.uima.processor; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.solr.request.SolrQueryRequest; +import org.apache.solr.response.SolrQueryResponse; +import org.apache.solr.update.processor.UpdateRequestProcessor; +import org.apache.solr.update.processor.UpdateRequestProcessorFactory; + +/** + * Factory for {@link UIMAUpdateRequestProcessor} + * + * @version $Id$ + */ +public class UIMAUpdateRequestProcessorFactory extends UpdateRequestProcessorFactory { + + @Override + public UpdateRequestProcessor getInstance(SolrQueryRequest req, SolrQueryResponse rsp, + UpdateRequestProcessor next) { + return new UIMAUpdateRequestProcessor(next, req.getCore()); + } + +} diff --git a/solr/contrib/uima/src/main/java/org/apache/solr/uima/processor/ae/AEProvider.java b/solr/contrib/uima/src/main/java/org/apache/solr/uima/processor/ae/AEProvider.java new file mode 100644 index 00000000000..89c981ab6e3 --- /dev/null +++ b/solr/contrib/uima/src/main/java/org/apache/solr/uima/processor/ae/AEProvider.java @@ -0,0 +1,32 @@ +package org.apache.solr.uima.processor.ae; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.uima.analysis_engine.AnalysisEngine; +import org.apache.uima.resource.ResourceInitializationException; + +/** + * provide an Apache UIMA {@link AnalysisEngine} + * + * @version $Id$ + */ +public interface AEProvider { + + public AnalysisEngine getAE() throws ResourceInitializationException; + +} diff --git a/solr/contrib/uima/src/main/java/org/apache/solr/uima/processor/ae/AEProviderFactory.java b/solr/contrib/uima/src/main/java/org/apache/solr/uima/processor/ae/AEProviderFactory.java new file mode 100644 index 00000000000..2104e753353 --- /dev/null +++ b/solr/contrib/uima/src/main/java/org/apache/solr/uima/processor/ae/AEProviderFactory.java @@ -0,0 +1,53 @@ +package org.apache.solr.uima.processor.ae; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.util.HashMap; +import java.util.Map; + +/** + * Singleton factory class responsible of {@link AEProvider}s' creation + * + * @version $Id$ + */ +public class AEProviderFactory { + + private static AEProviderFactory instance; + + private Map providerCache = new HashMap(); + + private AEProviderFactory() { + // Singleton + } + + public static AEProviderFactory getInstance() { + if (instance == null) { + instance = new AEProviderFactory(); + } + return instance; + } + + public synchronized AEProvider getAEProvider(String core, String aePath, + Map runtimeParameters) { + String key = new StringBuilder(core).append(aePath).toString(); + if (providerCache.get(key) == null) { + providerCache.put(key, new OverridingParamsAEProvider(aePath, runtimeParameters)); + } + return providerCache.get(key); + } +} diff --git a/solr/contrib/uima/src/main/java/org/apache/solr/uima/processor/ae/OverridingParamsAEProvider.java b/solr/contrib/uima/src/main/java/org/apache/solr/uima/processor/ae/OverridingParamsAEProvider.java new file mode 100644 index 00000000000..d4d74910379 --- /dev/null +++ b/solr/contrib/uima/src/main/java/org/apache/solr/uima/processor/ae/OverridingParamsAEProvider.java @@ -0,0 +1,89 @@ +package org.apache.solr.uima.processor.ae; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.net.URL; +import java.util.Map; + +import org.apache.uima.UIMAFramework; +import org.apache.uima.analysis_engine.AnalysisEngine; +import org.apache.uima.analysis_engine.AnalysisEngineDescription; +import org.apache.uima.resource.ResourceInitializationException; +import org.apache.uima.util.XMLInputSource; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * {@link AEProvider} implementation that creates an Aggregate AE from the given path, also + * injecting runtime parameters defined in the solrconfig.xml Solr configuration file and assigning + * them as overriding parameters in the aggregate AE + * + * @version $Id$ + */ +public class OverridingParamsAEProvider implements AEProvider { + + private static Logger log = LoggerFactory.getLogger(OverridingParamsAEProvider.class); + + private String aeFilePath; + + private AnalysisEngine cachedAE; + + private Map runtimeParameters; + + public OverridingParamsAEProvider(String aeFilePath, Map runtimeParameters) { + this.aeFilePath = aeFilePath; + this.runtimeParameters = runtimeParameters; + } + + public synchronized AnalysisEngine getAE() throws ResourceInitializationException { + try { + if (cachedAE == null) { + // get Resource Specifier from XML file + URL url = this.getClass().getResource(aeFilePath); + XMLInputSource in = new XMLInputSource(url); + + // get AE description + AnalysisEngineDescription desc = UIMAFramework.getXMLParser() + .parseAnalysisEngineDescription(in); + + /* iterate over each AE (to set runtime parameters) */ + for (String attributeName : runtimeParameters.keySet()) { + desc.getAnalysisEngineMetaData().getConfigurationParameterSettings().setParameterValue( + attributeName, runtimeParameters.get(attributeName)); + log.info(new StringBuilder("setting ").append(attributeName).append(" : ").append( + runtimeParameters.get(attributeName)).toString()); + } + // create AE here + cachedAE = UIMAFramework.produceAnalysisEngine(desc); + if (log.isDebugEnabled()) + log.debug(new StringBuilder("AE ").append(cachedAE.getAnalysisEngineMetaData().getName()) + .append(" created from descriptor ").append(aeFilePath).toString()); + } else { + cachedAE.reconfigure(); + if (log.isDebugEnabled()) + log.debug(new StringBuilder("AE ").append(cachedAE.getAnalysisEngineMetaData().getName()) + .append(" at path ").append(aeFilePath).append(" reconfigured ").toString()); + } + } catch (Exception e) { + cachedAE = null; + throw new ResourceInitializationException(e); + } + return cachedAE; + } + +} \ No newline at end of file diff --git a/solr/contrib/uima/src/main/resources/org/apache/uima/desc/AggregateSentenceAE.xml b/solr/contrib/uima/src/main/resources/org/apache/uima/desc/AggregateSentenceAE.xml new file mode 100644 index 00000000000..75ae50e500e --- /dev/null +++ b/solr/contrib/uima/src/main/resources/org/apache/uima/desc/AggregateSentenceAE.xml @@ -0,0 +1,41 @@ + + + org.apache.uima.java + false + + + + + + + + + + AggregateSentenceAE + + 1.0 + + + + + + WhitespaceTokenizer + HmmTagger + + + + + + + + + + + + true + true + false + + + + diff --git a/solr/contrib/uima/src/main/resources/org/apache/uima/desc/ExtServicesAE.xml b/solr/contrib/uima/src/main/resources/org/apache/uima/desc/ExtServicesAE.xml new file mode 100644 index 00000000000..ef5268fd592 --- /dev/null +++ b/solr/contrib/uima/src/main/resources/org/apache/uima/desc/ExtServicesAE.xml @@ -0,0 +1,57 @@ + + + org.apache.uima.java + false + + + + + + + + + + + + + + + + + + + + + + ExtServicesAE + + 1.0 + + + + + + OpenCalaisAnnotator + TextKeywordExtractionAEDescriptor + TextLanguageDetectionAEDescriptor + TextCategorizationAEDescriptor + TextConceptTaggingAEDescriptor + TextRankedEntityExtractionAEDescriptor + + + + + + + + + + + + true + true + false + + + + diff --git a/solr/contrib/uima/src/main/resources/org/apache/uima/desc/HmmTagger.xml b/solr/contrib/uima/src/main/resources/org/apache/uima/desc/HmmTagger.xml new file mode 100644 index 00000000000..8fe4216d91a --- /dev/null +++ b/solr/contrib/uima/src/main/resources/org/apache/uima/desc/HmmTagger.xml @@ -0,0 +1,121 @@ + + + + org.apache.uima.java + true + org.apache.uima.examples.tagger.HMMTagger + + Hidden Markov Model - Part of Speech Tagger + A configuration of the HmmTaggerAnnotator that looks for + parts of speech of identified tokens within existing + Sentence and Token annotations. See also + WhitespaceTokenizer.xml. + 1.0 + The Apache Software Foundation + + + NGRAM_SIZE + Integer + false + true + + + + + NGRAM_SIZE + + 3 + + + + + + + org.apache.uima.TokenAnnotation + Single token annotation + uima.tcas.Annotation + + + posTag + contains part-of-speech of a + corresponding token + uima.cas.String + + + + + org.apache.uima.SentenceAnnotation + sentence annotation + uima.tcas.Annotation + + + + + + + + + org.apache.uima.TokenAnnotation + org.apache.uima.SentenceAnnotation + org.apache.uima.TokenAnnotation:end + org.apache.uima.TokenAnnotation:begin + + + org.apache.uima.TokenAnnotation + org.apache.uima.TokenAnnotation:posTag + org.apache.uima.TokenAnnotation:end + org.apache.uima.TokenAnnotation:begin + + + + + + true + true + false + + + + + Model + HMM Tagger model file + org.apache.uima.examples.tagger.IModelResource + false + + + + + + ModelFile + HMM Tagger model file + + file:english/BrownModel.dat + + org.apache.uima.examples.tagger.ModelResource + + + + + Model + ModelFile + + + + diff --git a/solr/contrib/uima/src/main/resources/org/apache/uima/desc/OpenCalaisAnnotator.xml b/solr/contrib/uima/src/main/resources/org/apache/uima/desc/OpenCalaisAnnotator.xml new file mode 100644 index 00000000000..e7b0c07dcd4 --- /dev/null +++ b/solr/contrib/uima/src/main/resources/org/apache/uima/desc/OpenCalaisAnnotator.xml @@ -0,0 +1,194 @@ + + + org.apache.uima.java + true + org.apache.uima.annotator.calais.OpenCalaisAnnotator + + OpenCalaisAnnotator + + + + allowDistribution + + Boolean + false + true + + + allowSearch + + Boolean + false + true + + + submitter + + String + false + true + + + licenseID + + String + false + true + + + + + allowDistribution + + false + + + + allowSearch + + false + + + + submitter + + + + + + licenseID + + OC_LICENSE_ID + + + + + + + org.apache.uima.calais.Person + + org.apache.uima.calais.BaseType + + + org.apache.uima.calais.Anniversary + + org.apache.uima.calais.BaseType + + + org.apache.uima.calais.City + + org.apache.uima.calais.BaseType + + + org.apache.uima.calais.Company + + org.apache.uima.calais.BaseType + + + org.apache.uima.calais.Continent + + org.apache.uima.calais.BaseType + + + org.apache.uima.calais.Country + + org.apache.uima.calais.BaseType + + + org.apache.uima.calais.Currency + + org.apache.uima.calais.BaseType + + + org.apache.uima.calais.EmailAddress + + org.apache.uima.calais.BaseType + + + org.apache.uima.calais.Facility + + org.apache.uima.calais.BaseType + + + org.apache.uima.calais.FaxNumber + + org.apache.uima.calais.BaseType + + + org.apache.uima.calais.Holiday + + org.apache.uima.calais.BaseType + + + org.apache.uima.calais.IndustryTerm + + org.apache.uima.calais.BaseType + + + org.apache.uima.calais.NaturalDisaster + + org.apache.uima.calais.BaseType + + + org.apache.uima.calais.NaturalFeature + + org.apache.uima.calais.BaseType + + + org.apache.uima.calais.Organization + + org.apache.uima.calais.BaseType + + + org.apache.uima.calais.PhoneNumber + + org.apache.uima.calais.BaseType + + + org.apache.uima.calais.ProviceOrState + + org.apache.uima.calais.BaseType + + + org.apache.uima.calais.Region + + org.apache.uima.calais.BaseType + + + org.apache.uima.calais.Technology + + org.apache.uima.calais.BaseType + + + org.apache.uima.calais.URL + + org.apache.uima.calais.BaseType + + + org.apache.uima.calais.BaseType + + uima.tcas.Annotation + + + calaisType + OpenCalais type + uima.cas.String + + + + + + + + + + + + + + true + true + false + + + diff --git a/solr/contrib/uima/src/main/resources/org/apache/uima/desc/OverridingParamsExtServicesAE.xml b/solr/contrib/uima/src/main/resources/org/apache/uima/desc/OverridingParamsExtServicesAE.xml new file mode 100644 index 00000000000..81bd4029016 --- /dev/null +++ b/solr/contrib/uima/src/main/resources/org/apache/uima/desc/OverridingParamsExtServicesAE.xml @@ -0,0 +1,147 @@ + + + org.apache.uima.java + false + + + + + + + + + + + + + + + + + + + + + + + + + ExtServicesAE + + 1.0 + + + + oc_licenseID + String + false + true + + OpenCalaisAnnotator/licenseID + + + + keyword_apikey + String + false + true + + TextKeywordExtractionAEDescriptor/apikey + + + + concept_apikey + String + false + true + + TextConceptTaggingAEDescriptor/apikey + + + + lang_apikey + String + false + true + + TextLanguageDetectionAEDescriptor/apikey + + + + cat_apikey + String + false + true + + TextCategorizationAEDescriptor/apikey + + + + entities_apikey + String + false + true + + TextRankedEntityExtractionAEDescriptor/apikey + + + + + + oc_licenseID + + licenseid + + + + keyword_apikey + + apikey + + + + concept_apikey + + apikey + + + + lang_apikey + + apikey + + + + cat_apikey + + apikey + + + + + + AggregateSentenceAE + OpenCalaisAnnotator + TextKeywordExtractionAEDescriptor + TextLanguageDetectionAEDescriptor + TextCategorizationAEDescriptor + TextConceptTaggingAEDescriptor + TextRankedEntityExtractionAEDescriptor + + + + + + + + + + + + true + true + false + + + + diff --git a/solr/contrib/uima/src/main/resources/org/apache/uima/desc/TextCategorizationAEDescriptor.xml b/solr/contrib/uima/src/main/resources/org/apache/uima/desc/TextCategorizationAEDescriptor.xml new file mode 100644 index 00000000000..16aff2b7775 --- /dev/null +++ b/solr/contrib/uima/src/main/resources/org/apache/uima/desc/TextCategorizationAEDescriptor.xml @@ -0,0 +1,102 @@ + + + + org.apache.uima.java + true + org.apache.uima.alchemy.annotator.TextCategorizationAnnotator + + TextCategorizationAEDescriptor + + 1.0 + + + + apikey + String + false + true + + + outputMode + String + false + true + + + baseUrl + String + false + false + + + + + outputMode + + xml + + + + apikey + + AA_API_KEY + + + + + + + org.apache.uima.alchemy.ts.categorization.Category + + uima.cas.TOP + + + score + + uima.cas.String + + + text + + uima.cas.String + + + + + + + + + + + + + + + + true + true + false + + + + diff --git a/solr/contrib/uima/src/main/resources/org/apache/uima/desc/TextConceptTaggingAEDescriptor.xml b/solr/contrib/uima/src/main/resources/org/apache/uima/desc/TextConceptTaggingAEDescriptor.xml new file mode 100644 index 00000000000..ee9166c3a2e --- /dev/null +++ b/solr/contrib/uima/src/main/resources/org/apache/uima/desc/TextConceptTaggingAEDescriptor.xml @@ -0,0 +1,196 @@ + + + + org.apache.uima.java + true + org.apache.uima.alchemy.annotator.TextConceptTaggingAnnotator + + TextConceptTaggingAEDescriptor + + 1.0 + + + + apikey + String + false + true + + + outputMode + String + false + true + + + linkedData + String + false + false + + + showSourceText + Integer + false + true + + + maxRetrieve + String + false + false + + + url + String + false + false + + + + + apikey + + + + + + outputMode + + xml + + + + linkedData + + 1 + + + + showSourceText + + 0 + + + + maxRetrieve + + 8 + + + + + + + org.apache.uima.alchemy.ts.concept.ConceptFS + a concept tag + uima.cas.TOP + + + text + + uima.cas.String + + + relevance + + uima.cas.String + + + website + + uima.cas.String + + + geo + + uima.cas.String + + + dbpedia + + uima.cas.String + + + yago + + uima.cas.String + + + opencyc + + uima.cas.String + + + freebase + + uima.cas.String + + + ciaFactbook + + uima.cas.String + + + census + + uima.cas.String + + + geonames + + uima.cas.String + + + musicBrainz + + uima.cas.String + + + crunchbase + + uima.cas.String + + + semanticCrunchbase + + uima.cas.String + + + + + + + + + + + + + + + + true + true + false + + + + diff --git a/solr/contrib/uima/src/main/resources/org/apache/uima/desc/TextKeywordExtractionAEDescriptor.xml b/solr/contrib/uima/src/main/resources/org/apache/uima/desc/TextKeywordExtractionAEDescriptor.xml new file mode 100644 index 00000000000..af6a5127a75 --- /dev/null +++ b/solr/contrib/uima/src/main/resources/org/apache/uima/desc/TextKeywordExtractionAEDescriptor.xml @@ -0,0 +1,107 @@ + + + org.apache.uima.java + true + org.apache.uima.alchemy.annotator.TextKeywordExtractionAnnotator + + TextKeywordExtractionAEDescriptor + + 1.0 + + + + apikey + String + false + true + + + outputMode + String + false + true + + + baseUrl + String + false + false + + + url + String + false + false + + + maxRetrieve + Integer + false + false + + + showSourceText + Integer + false + false + + + + + outputMode + + xml + + + + apikey + + 04490000a72fe7ec5cb3497f14e77f338c86f2fe + + + + maxRetrieve + + 10 + + + + showSourceText + + 0 + + + + + + + org.apache.uima.alchemy.ts.keywords.KeywordFS + + uima.cas.TOP + + + text + + uima.cas.String + + + + + + + + + + + + + + + + true + true + false + + + + diff --git a/solr/contrib/uima/src/main/resources/org/apache/uima/desc/TextLanguageDetectionAEDescriptor.xml b/solr/contrib/uima/src/main/resources/org/apache/uima/desc/TextLanguageDetectionAEDescriptor.xml new file mode 100644 index 00000000000..6f9fb982ab8 --- /dev/null +++ b/solr/contrib/uima/src/main/resources/org/apache/uima/desc/TextLanguageDetectionAEDescriptor.xml @@ -0,0 +1,107 @@ + + + org.apache.uima.java + true + org.apache.uima.alchemy.annotator.TextLanguageDetectionAnnotator + + TextLanguageDetectionAEDescriptor + + 1.0 + + + + apikey + String + false + true + + + outputMode + String + false + true + + + url + String + false + false + + + + + outputMode + + xml + + + + apikey + + AA_API_KEY + + + + + + + org.apache.uima.alchemy.ts.language.LanguageFS + + uima.cas.TOP + + + language + + uima.cas.String + + + iso6391 + + uima.cas.String + + + iso6392 + + uima.cas.String + + + iso6393 + + uima.cas.String + + + ethnologue + + uima.cas.String + + + nativeSpeakers + + uima.cas.String + + + wikipedia + + uima.cas.String + + + + + + + + + + + + + + + + true + true + false + + + + diff --git a/solr/contrib/uima/src/main/resources/org/apache/uima/desc/TextRankedEntityExtractionAEDescriptor.xml b/solr/contrib/uima/src/main/resources/org/apache/uima/desc/TextRankedEntityExtractionAEDescriptor.xml new file mode 100644 index 00000000000..410d6c9f825 --- /dev/null +++ b/solr/contrib/uima/src/main/resources/org/apache/uima/desc/TextRankedEntityExtractionAEDescriptor.xml @@ -0,0 +1,403 @@ + + + + org.apache.uima.java + true + org.apache.uima.alchemy.annotator.TextRankedNamedEntityExtractionAnnotator + + TextRankedEntityExtractionAEDescriptor + + 1.0 + + + + apikey + String + false + true + + + outputMode + String + false + true + + + disambiguate + Integer + false + true + + + linkedData + String + false + false + + + showSourceText + Integer + false + true + + + baseUrl + String + false + false + + + url + String + false + false + + + coreference + String + false + false + + + quotations + String + false + false + + + + + apikey + + + + + + outputMode + + xml + + + + disambiguate + + 1 + + + + linkedData + + 1 + + + + coreference + + 1 + + + + showSourceText + + 0 + + + + quotations + + 1 + + + + + + + + + + org.apache.uima.alchemy.ts.entity.Anniversary + + org.apache.uima.alchemy.ts.entity.BaseEntity + + + org.apache.uima.alchemy.ts.entity.Automobile + + org.apache.uima.alchemy.ts.entity.BaseEntity + + + org.apache.uima.alchemy.ts.entity.City + + org.apache.uima.alchemy.ts.entity.BaseEntity + + + org.apache.uima.alchemy.ts.entity.Company + + org.apache.uima.alchemy.ts.entity.BaseEntity + + + org.apache.uima.alchemy.ts.entity.Continent + + org.apache.uima.alchemy.ts.entity.BaseEntity + + + org.apache.uima.alchemy.ts.entity.Country + + org.apache.uima.alchemy.ts.entity.BaseEntity + + + org.apache.uima.alchemy.ts.entity.EntertainmentAward + + org.apache.uima.alchemy.ts.entity.BaseEntity + + + org.apache.uima.alchemy.ts.entity.Facility + + org.apache.uima.alchemy.ts.entity.BaseEntity + + + org.apache.uima.alchemy.ts.entity.FieldTerminology + + org.apache.uima.alchemy.ts.entity.BaseEntity + + + org.apache.uima.alchemy.ts.entity.FinancialMarketIndex + + org.apache.uima.alchemy.ts.entity.BaseEntity + + + org.apache.uima.alchemy.ts.entity.GeographicFeature + + org.apache.uima.alchemy.ts.entity.BaseEntity + + + org.apache.uima.alchemy.ts.entity.HealthCondition + + org.apache.uima.alchemy.ts.entity.BaseEntity + + + org.apache.uima.alchemy.ts.entity.Holiday + + org.apache.uima.alchemy.ts.entity.BaseEntity + + + org.apache.uima.alchemy.ts.entity.Movie + + org.apache.uima.alchemy.ts.entity.BaseEntity + + + org.apache.uima.alchemy.ts.entity.MusicGroup + + org.apache.uima.alchemy.ts.entity.BaseEntity + + + org.apache.uima.alchemy.ts.entity.NaturalDisaster + + org.apache.uima.alchemy.ts.entity.BaseEntity + + + org.apache.uima.alchemy.ts.entity.Organization + + org.apache.uima.alchemy.ts.entity.BaseEntity + + + org.apache.uima.alchemy.ts.entity.Person + + org.apache.uima.alchemy.ts.entity.BaseEntity + + + org.apache.uima.alchemy.ts.entity.PrintMedia + + org.apache.uima.alchemy.ts.entity.BaseEntity + + + org.apache.uima.alchemy.ts.entity.RadioProgram + + org.apache.uima.alchemy.ts.entity.BaseEntity + + + org.apache.uima.alchemy.ts.entity.RadioStation + + org.apache.uima.alchemy.ts.entity.BaseEntity + + + org.apache.uima.alchemy.ts.entity.Region + + org.apache.uima.alchemy.ts.entity.BaseEntity + + + org.apache.uima.alchemy.ts.entity.Sport + + org.apache.uima.alchemy.ts.entity.BaseEntity + + + org.apache.uima.alchemy.ts.entity.StateOrCounty + + org.apache.uima.alchemy.ts.entity.BaseEntity + + + org.apache.uima.alchemy.ts.entity.Technology + + org.apache.uima.alchemy.ts.entity.BaseEntity + + + org.apache.uima.alchemy.ts.entity.TelevisionShow + + org.apache.uima.alchemy.ts.entity.BaseEntity + + + org.apache.uima.alchemy.ts.entity.TelevisionStation + + org.apache.uima.alchemy.ts.entity.BaseEntity + + + org.apache.uima.alchemy.ts.entity.OperatingSystem + + org.apache.uima.alchemy.ts.entity.BaseEntity + + + org.apache.uima.alchemy.ts.entity.SportingEvent + + org.apache.uima.alchemy.ts.entity.BaseEntity + + + org.apache.uima.alchemy.ts.entity.Drug + + org.apache.uima.alchemy.ts.entity.BaseEntity + + + org.apache.uima.alchemy.ts.entity.BaseEntity + + uima.cas.TOP + + + text + + uima.cas.String + + + count + + uima.cas.String + + + relevance + + uima.cas.String + + + disambiguation + + uima.cas.String + + + subType + + uima.cas.String + + + website + + uima.cas.String + + + geo + + uima.cas.String + + + dbpedia + + uima.cas.String + + + yago + + uima.cas.String + + + opencyc + + uima.cas.String + + + umbel + + uima.cas.String + + + freebase + + uima.cas.String + + + ciaFactbook + + uima.cas.String + + + census + + uima.cas.String + + + geonames + + uima.cas.String + + + musicBrainz + + uima.cas.String + + + quotations + + uima.cas.StringArray + true + + + occurrences + A list of annotations annotating this entity + uima.cas.FSList + uima.tcas.Annotation + + + + + + + + + + + + + + + + true + true + false + + + + diff --git a/solr/contrib/uima/src/main/resources/org/apache/uima/desc/WhitespaceTokenizer.xml b/solr/contrib/uima/src/main/resources/org/apache/uima/desc/WhitespaceTokenizer.xml new file mode 100644 index 00000000000..686dbefc5d3 --- /dev/null +++ b/solr/contrib/uima/src/main/resources/org/apache/uima/desc/WhitespaceTokenizer.xml @@ -0,0 +1,115 @@ + + + + + + + org.apache.uima.java + + true + + org.apache.uima.annotator.WhitespaceTokenizer + + + + WhitespaceTokenizer + + creates token and sentence annotations for whitespace + separated languages + + 1.0 + The Apache Software Foundation + + + + SofaNames + + The Sofa names the annotator should work on. If no + names are specified, the annotator works on the + default sofa. + + String + true + false + + + + + + + + + + + org.apache.uima.TokenAnnotation + Single token annotation + uima.tcas.Annotation + + + tokenType + token type + uima.cas.String + + + + + + org.apache.uima.SentenceAnnotation + sentence annotation + uima.tcas.Annotation + + + + + + + + + + + + + org.apache.uima.TokenAnnotation + + org.apache.uima.TokenAnnotation:tokentype + + org.apache.uima.SentenceAnnotation + + + x-unspecified + + + + + + + diff --git a/solr/contrib/uima/src/main/resources/org/apache/uima/desc/baseAlchemyTypeSystemDescriptor.xml b/solr/contrib/uima/src/main/resources/org/apache/uima/desc/baseAlchemyTypeSystemDescriptor.xml new file mode 100644 index 00000000000..32d5d843a48 --- /dev/null +++ b/solr/contrib/uima/src/main/resources/org/apache/uima/desc/baseAlchemyTypeSystemDescriptor.xml @@ -0,0 +1,41 @@ + + + + baseAlchemyTypeSystemDescriptor + + 1.0 + + + + org.apache.uima.alchemy.ts.entity.AlchemyAnnotation + + uima.tcas.Annotation + + + alchemyType + alchemyAPI type + uima.cas.String + + + + + diff --git a/solr/contrib/uima/src/main/resources/solr/conf/aggregate-uima-config.xml b/solr/contrib/uima/src/main/resources/solr/conf/aggregate-uima-config.xml new file mode 100644 index 00000000000..0e66585bf80 --- /dev/null +++ b/solr/contrib/uima/src/main/resources/solr/conf/aggregate-uima-config.xml @@ -0,0 +1,33 @@ + + + + + + VALID_ALCHEMYAPI_KEY + VALID_ALCHEMYAPI_KEY + VALID_ALCHEMYAPI_KEY + VALID_ALCHEMYAPI_KEY + VALID_OPENCALAIS_KEY + + /org/apache/uima/desc/OverridingParamsExtServicesAE.xml + text,title + + + + + + \ No newline at end of file diff --git a/solr/contrib/uima/src/main/resources/solr/conf/uima-fields.xml b/solr/contrib/uima/src/main/resources/solr/conf/uima-fields.xml new file mode 100644 index 00000000000..270aaa6f62e --- /dev/null +++ b/solr/contrib/uima/src/main/resources/solr/conf/uima-fields.xml @@ -0,0 +1,9 @@ + + + + + + + + + \ No newline at end of file diff --git a/solr/contrib/uima/src/test/java/org/apache/solr/uima/processor/UIMAUpdateRequestProcessorTest.java b/solr/contrib/uima/src/test/java/org/apache/solr/uima/processor/UIMAUpdateRequestProcessorTest.java new file mode 100644 index 00000000000..b0499538d6e --- /dev/null +++ b/solr/contrib/uima/src/test/java/org/apache/solr/uima/processor/UIMAUpdateRequestProcessorTest.java @@ -0,0 +1,138 @@ +package org.apache.solr.uima.processor; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.net.URL; +import java.net.URLConnection; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.Map; + +import org.apache.solr.SolrTestCaseJ4; +import org.apache.solr.common.params.MultiMapSolrParams; +import org.apache.solr.common.params.SolrParams; +import org.apache.solr.common.params.UpdateParams; +import org.apache.solr.common.util.ContentStream; +import org.apache.solr.common.util.ContentStreamBase; +import org.apache.solr.core.SolrCore; +import org.apache.solr.handler.XmlUpdateRequestHandler; +import org.apache.solr.request.SolrQueryRequestBase; +import org.apache.solr.response.SolrQueryResponse; +import org.apache.solr.update.processor.UpdateRequestProcessorChain; +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.Test; + +/** + * TestCase for {@link UIMAUpdateRequestProcessor} + * + * @version $Id$ + */ +public class UIMAUpdateRequestProcessorTest extends SolrTestCaseJ4 { + + @BeforeClass + public static void beforeClass() throws Exception { + initCore("solrconfig.xml", "schema.xml", "solr-uima"); + } + + @Override + @Before + public void setUp() throws Exception { + super.setUp(); + clearIndex(); + assertU(commit()); + } + + @Test + public void testProcessorConfiguration() { + SolrCore core = h.getCore(); + UpdateRequestProcessorChain chained = core.getUpdateProcessingChain("uima"); + assertNotNull(chained); + UIMAUpdateRequestProcessorFactory factory = (UIMAUpdateRequestProcessorFactory) chained + .getFactories()[0]; + assertNotNull(factory); + } + + @Test + public void testProcessing() throws Exception { + // this test requires an internet connection (e.g. opencalais api) + checkInternetConnection(); + + addDoc(adoc( + "id", + "2312312321312", + "text", + "SpellCheckComponent got improvement related to recent Lucene changes. \n " + + "Add support for specifying Spelling SuggestWord Comparator to Lucene spell " + + "checkers for SpellCheckComponent. Issue SOLR-2053 is already fixed, patch is" + + " attached if you need it, but it is also committed to trunk and 3_x branch." + + " Last Lucene European Conference has been held in Prague.")); + assertU(commit()); + assertQ(req("language:english"), "//*[@numFound='1']"); + } + + @Test + public void testTwoUpdates() { + // this test requires an internet connection (e.g. opencalais api) + checkInternetConnection(); + + try { + addDoc(adoc("id", "1", "text", "The Apache Software Foundation is happy to announce " + + "BarCampApache Sydney, Australia, the first ASF-backed event in the Southern " + + "Hemisphere!")); + assertU(commit()); + assertQ(req("language:english"), "//*[@numFound='1']"); + + addDoc(adoc("id", "2", "text", "Taking place 11th December 2010 at the University " + + "of Sydney's Darlington Centre, the BarCampApache \"unconference\" will be" + + " attendee-driven, facilitated by members of the Apache community and will " + + "focus on the Apache...")); + assertU(commit()); + assertQ(req("language:english"), "//*[@numFound='2']"); + + } catch (Exception e) { + assumeNoException("Multiple updates on same instance didn't work", e); + } + } + + private void addDoc(String doc) throws Exception { + Map params = new HashMap(); + params.put(UpdateParams.UPDATE_PROCESSOR, new String[] { "uima" }); + MultiMapSolrParams mmparams = new MultiMapSolrParams(params); + SolrQueryRequestBase req = new SolrQueryRequestBase(h.getCore(), (SolrParams) mmparams) { + }; + + XmlUpdateRequestHandler handler = new XmlUpdateRequestHandler(); + handler.init(null); + ArrayList streams = new ArrayList(2); + streams.add(new ContentStreamBase.StringStream(doc)); + req.setContentStreams(streams); + handler.handleRequestBody(req, new SolrQueryResponse()); + } + + private void checkInternetConnection() { + try { + URLConnection conn = new URL("http://www.apache.org/").openConnection(); + conn.setConnectTimeout(5000); + conn.setReadTimeout(5000); + conn.connect(); + } catch (Exception ex) { + assumeNoException("This test requires an internet connection", ex); + } + } +} diff --git a/solr/contrib/uima/src/test/resources/solr-uima/conf/protwords.txt b/solr/contrib/uima/src/test/resources/solr-uima/conf/protwords.txt new file mode 100644 index 00000000000..1dfc0abecbf --- /dev/null +++ b/solr/contrib/uima/src/test/resources/solr-uima/conf/protwords.txt @@ -0,0 +1,21 @@ +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +#----------------------------------------------------------------------- +# Use a protected word file to protect against the stemmer reducing two +# unrelated words to the same base word. + +# Some non-words that normally won't be encountered, +# just to test that they won't be stemmed. +dontstems +zwhacky + diff --git a/solr/contrib/uima/src/test/resources/solr-uima/conf/schema.xml b/solr/contrib/uima/src/test/resources/solr-uima/conf/schema.xml new file mode 100644 index 00000000000..ff447a97f2b --- /dev/null +++ b/solr/contrib/uima/src/test/resources/solr-uima/conf/schema.xml @@ -0,0 +1,679 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + id + + + text + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/solr/contrib/uima/src/test/resources/solr-uima/conf/solrconfig.xml b/solr/contrib/uima/src/test/resources/solr-uima/conf/solrconfig.xml new file mode 100644 index 00000000000..173505fa67f --- /dev/null +++ b/solr/contrib/uima/src/test/resources/solr-uima/conf/solrconfig.xml @@ -0,0 +1,1108 @@ + + + + + + + LUCENE_40 + ${solr.abortOnConfigurationError:true} + + + + + + + + + + + + + + + + ${solr.data.dir:./solr/data} + + + + + + false + + 10 + + + + + 32 + + 10000 + 1000 + 10000 + + + + + + + + + + + + + native + + + + + + + false + 32 + 10 + + + + + + + + false + + + true + + + + + + + + 1 + + 0 + + + + + false + + + + + + + + + + + + + + + + + + + + + + + + + + + 1024 + + + + + + + + + + + + + + + + true + + + + + + + + 20 + + + 200 + + + + + + + + + + + + + + solr rocks + 0 + 10 + + + static firstSearcher warming query from + solrconfig.xml + + + + + + false + + + 2 + + + + + + + + + + + + + + + + + + + + + + + explicit + + + + + + + + + + + + + dismax + explicit + 0.01 + + text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 + manu^1.1 cat^1.4 + + + text^0.2 features^1.1 name^1.5 manu^1.4 + manu_exact^1.9 + + + popularity^0.5 recip(price,1,1000,1000)^0.3 + + + id,name,price,score + + + 2<-1 5<-2 6<90% + 100 + *:* + + text features name + + 0 + + name + regex + + + + + + + dismax + explicit + text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 + 2<-1 5<-2 6<90% + + incubationdate_dt:[* TO NOW/DAY-1MONTH]^2.2 + + + + inStock:true + + + + cat + manu_exact + price:[* TO 500] + price:[500 TO *] + + + + + + + + + + textSpell + + + default + name + ./spellchecker + + + + + + + + + + + + false + + false + + 1 + + + spellcheck + + + + + + + + true + + + tvComponent + + + + + + + + + default + + org.carrot2.clustering.lingo.LingoClusteringAlgorithm + + 20 + + + stc + org.carrot2.clustering.stc.STCClusteringAlgorithm + + + + + true + default + true + + name + id + + features + + true + + + + false + + + clusteringComponent + + + + + + + + text + true + ignored_ + + + true + links + ignored_ + + + + + + + + + + true + + + termsComponent + + + + + + + + + + uima + + + + + + + + + + + + + + + + + + + + + + + + + + standard + solrpingquery + all + + + + + + + explicit + true + + + + + + + + + 100 + + + + + + + + 70 + + 0.5 + + [-\w ,/\n\"']{20,200} + + + + + + + ]]> + ]]> + + + + + + + + + + + + + + + + + + + 5 + + + + + + + + + + * + + + + + + + 04490000a72fe7ec5cb3497f14e77f338c86f2fe + 04490000a72fe7ec5cb3497f14e77f338c86f2fe + 04490000a72fe7ec5cb3497f14e77f338c86f2fe + 04490000a72fe7ec5cb3497f14e77f338c86f2fe + 04490000a72fe7ec5cb3497f14e77f338c86f2fe + g6h9zamsdtwhb93nc247ecrs + + /org/apache/uima/desc/OverridingParamsExtServicesAE.xml + text + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/solr/contrib/uima/src/test/resources/solr-uima/conf/spellings.txt b/solr/contrib/uima/src/test/resources/solr-uima/conf/spellings.txt new file mode 100644 index 00000000000..162a044d561 --- /dev/null +++ b/solr/contrib/uima/src/test/resources/solr-uima/conf/spellings.txt @@ -0,0 +1,2 @@ +pizza +history diff --git a/solr/contrib/uima/src/test/resources/solr-uima/conf/stopwords.txt b/solr/contrib/uima/src/test/resources/solr-uima/conf/stopwords.txt new file mode 100644 index 00000000000..b5824da3263 --- /dev/null +++ b/solr/contrib/uima/src/test/resources/solr-uima/conf/stopwords.txt @@ -0,0 +1,58 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +#----------------------------------------------------------------------- +# a couple of test stopwords to test that the words are really being +# configured from this file: +stopworda +stopwordb + +#Standard english stop words taken from Lucene's StopAnalyzer +a +an +and +are +as +at +be +but +by +for +if +in +into +is +it +no +not +of +on +or +s +such +t +that +the +their +then +there +these +they +this +to +was +will +with + diff --git a/solr/contrib/uima/src/test/resources/solr-uima/conf/synonyms.txt b/solr/contrib/uima/src/test/resources/solr-uima/conf/synonyms.txt new file mode 100644 index 00000000000..b0e31cb7ec8 --- /dev/null +++ b/solr/contrib/uima/src/test/resources/solr-uima/conf/synonyms.txt @@ -0,0 +1,31 @@ +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +#----------------------------------------------------------------------- +#some test synonym mappings unlikely to appear in real input text +aaa => aaaa +bbb => bbbb1 bbbb2 +ccc => cccc1,cccc2 +a\=>a => b\=>b +a\,a => b\,b +fooaaa,baraaa,bazaaa + +# Some synonym groups specific to this example +GB,gib,gigabyte,gigabytes +MB,mib,megabyte,megabytes +Television, Televisions, TV, TVs +#notice we use "gib" instead of "GiB" so any WordDelimiterFilter coming +#after us won't split it into two words. + +# Synonym mappings can be used for spelling correction too +pixima => pixma + diff --git a/solr/example/example-DIH/solr/db/conf/solrconfig.xml b/solr/example/example-DIH/solr/db/conf/solrconfig.xml index e060e5788dc..e46d8fe299d 100644 --- a/solr/example/example-DIH/solr/db/conf/solrconfig.xml +++ b/solr/example/example-DIH/solr/db/conf/solrconfig.xml @@ -17,18 +17,17 @@ --> + + + LUCENE_40 - - ${solr.abortOnConfigurationError:true} - false @@ -350,112 +349,6 @@ - - - - - explicit - 0.01 - - text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4 - - - text^0.2 features^1.1 name^1.5 manu^1.4 manu_exact^1.9 - - - ord(popularity)^0.5 recip(rord(price),1,1000,1000)^0.3 - - - id,name,price,score - - - 2<-1 5<-2 6<90% - - 100 - *:* - - text features name - - 0 - - name - regex - - - - - - - explicit - text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 - 2<-1 5<-2 6<90% - - incubationdate_dt:[* TO NOW/DAY-1MONTH]^2.2 - - - - inStock:true - - - - cat - manu_exact - price:[* TO 500] - price:[500 TO *] - - - - - - - inStock:true - - - text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4 - - - 2<-1 5<-2 6<90% - - - - - - - - - diff --git a/solr/example/example-DIH/solr/mail/conf/solrconfig.xml b/solr/example/example-DIH/solr/mail/conf/solrconfig.xml index c0cbc66cf9d..19a4ebac562 100644 --- a/solr/example/example-DIH/solr/mail/conf/solrconfig.xml +++ b/solr/example/example-DIH/solr/mail/conf/solrconfig.xml @@ -17,14 +17,14 @@ --> - - ${solr.abortOnConfigurationError:true} + + + LUCENE_40 @@ -655,12 +655,6 @@ - - - @@ -721,7 +715,7 @@ 0.5 - [-\w ,/\n\"']{20,200} + [-\w ,/\n\"']{20,200} diff --git a/solr/example/example-DIH/solr/rss/conf/rss-data-config.xml b/solr/example/example-DIH/solr/rss/conf/rss-data-config.xml index 563b547c22b..4e4e38557d6 100644 --- a/solr/example/example-DIH/solr/rss/conf/rss-data-config.xml +++ b/solr/example/example-DIH/solr/rss/conf/rss-data-config.xml @@ -5,17 +5,22 @@ pk="link" url="http://rss.slashdot.org/Slashdot/slashdot" processor="XPathEntityProcessor" - forEach="/rss/channel/item" + forEach="/RDF/channel | /RDF/item" transformer="DateFormatTransformer"> + + + + - - - - - - - - + + + + + + + + + diff --git a/solr/example/example-DIH/solr/rss/conf/schema.xml b/solr/example/example-DIH/solr/rss/conf/schema.xml index d65e23b612e..d77854f00e3 100644 --- a/solr/example/example-DIH/solr/rss/conf/schema.xml +++ b/solr/example/example-DIH/solr/rss/conf/schema.xml @@ -294,10 +294,11 @@ - - - - + + + + + diff --git a/solr/example/example-DIH/solr/rss/conf/solrconfig.xml b/solr/example/example-DIH/solr/rss/conf/solrconfig.xml index 897f3287a10..054e6f3bb2b 100644 --- a/solr/example/example-DIH/solr/rss/conf/solrconfig.xml +++ b/solr/example/example-DIH/solr/rss/conf/solrconfig.xml @@ -17,18 +17,17 @@ --> + + + LUCENE_40 - - ${solr.abortOnConfigurationError:true} - false @@ -350,112 +349,6 @@ - - - - - explicit - 0.01 - - text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4 - - - text^0.2 features^1.1 name^1.5 manu^1.4 manu_exact^1.9 - - - ord(popularity)^0.5 recip(rord(price),1,1000,1000)^0.3 - - - id,name,price,score - - - 2<-1 5<-2 6<90% - - 100 - *:* - - text features name - - 0 - - name - regex - - - - - - - explicit - text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 - 2<-1 5<-2 6<90% - - incubationdate_dt:[* TO NOW/DAY-1MONTH]^2.2 - - - - inStock:true - - - - cat - manu_exact - price:[* TO 500] - price:[500 TO *] - - - - - - - inStock:true - - - text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4 - - - 2<-1 5<-2 6<90% - - - - - - - - - @@ -643,7 +526,7 @@ 0.5 - [-\w ,/\n\"']{20,200} + [-\w ,/\n\"']{20,200} diff --git a/solr/example/example-DIH/solr/tika/conf/solrconfig.xml b/solr/example/example-DIH/solr/tika/conf/solrconfig.xml index c33d513efd4..c4dae4cb5cb 100644 --- a/solr/example/example-DIH/solr/tika/conf/solrconfig.xml +++ b/solr/example/example-DIH/solr/tika/conf/solrconfig.xml @@ -17,14 +17,14 @@ --> - - ${solr.abortOnConfigurationError:true} + + + LUCENE_40 diff --git a/solr/example/exampledocs/gb18030-example.xml b/solr/example/exampledocs/gb18030-example.xml new file mode 100644 index 00000000000..769be19d441 --- /dev/null +++ b/solr/example/exampledocs/gb18030-example.xml @@ -0,0 +1,32 @@ + + + + + + GB18030TEST + Test with some GB18030 encoded characters + No accents here + ÕâÊÇÒ»¸ö¹¦ÄÜ + This is a feature (translated) + Õâ·ÝÎļþÊǺÜÓйâÔó + This document is very shiny (translated) + 0 + true + + + diff --git a/solr/example/exampledocs/post.sh b/solr/example/exampledocs/post.sh index ee5fdbe9f0d..d9dd4ed718e 100755 --- a/solr/example/exampledocs/post.sh +++ b/solr/example/exampledocs/post.sh @@ -19,10 +19,10 @@ URL=http://localhost:8983/solr/update for f in $FILES; do echo Posting file $f to $URL - curl $URL --data-binary @$f -H 'Content-type:text/xml; charset=utf-8' + curl $URL --data-binary @$f -H 'Content-type:application/xml' echo done #send the commit command to make sure all the changes are flushed and visible -curl $URL --data-binary '' -H 'Content-type:text/xml; charset=utf-8' +curl $URL --data-binary '' -H 'Content-type:application/xml' echo diff --git a/solr/example/solr/conf/schema.xml b/solr/example/solr/conf/schema.xml index 563b6732ae6..bad81dbbba8 100755 --- a/solr/example/solr/conf/schema.xml +++ b/solr/example/solr/conf/schema.xml @@ -376,6 +376,11 @@ + + + + + diff --git a/solr/example/solr/conf/synonyms.txt b/solr/example/solr/conf/synonyms.txt index b0e31cb7ec8..7f72128303b 100644 --- a/solr/example/solr/conf/synonyms.txt +++ b/solr/example/solr/conf/synonyms.txt @@ -12,11 +12,9 @@ #----------------------------------------------------------------------- #some test synonym mappings unlikely to appear in real input text -aaa => aaaa -bbb => bbbb1 bbbb2 -ccc => cccc1,cccc2 -a\=>a => b\=>b -a\,a => b\,b +aaafoo => aaabar +bbbfoo => bbbfoo bbbbar +cccfoo => cccbar cccbaz fooaaa,baraaa,bazaaa # Some synonym groups specific to this example diff --git a/solr/example/start.jar b/solr/example/start.jar index 2bd8f2d6eb5..b2fca2178f2 100755 --- a/solr/example/start.jar +++ b/solr/example/start.jar @@ -1,2 +1,2 @@ -AnyObjectId[2a4a9a163d79f9214d9b1d9c0dbb611f741d8f16] was removed in git history. +AnyObjectId[d3a94bcfae630a90d4103437bd3c2da0d37d98c9] was removed in git history. Apache SVN contains full history. \ No newline at end of file diff --git a/solr/lib/apache-solr-noggit-r944541.jar b/solr/lib/apache-solr-noggit-r944541.jar index e9021a53f24..e0624dd525f 100755 --- a/solr/lib/apache-solr-noggit-r944541.jar +++ b/solr/lib/apache-solr-noggit-r944541.jar @@ -1,2 +1,2 @@ -AnyObjectId[a798b805d0ce92606697cc1b2aac42bf416076e3] was removed in git history. +AnyObjectId[9b434f5760dd0d78350bdf8237273c0d5db0174e] was removed in git history. Apache SVN contains full history. \ No newline at end of file diff --git a/solr/site/features.html b/solr/site/features.html index c39c23e4b84..b195d27f326 100755 --- a/solr/site/features.html +++ b/solr/site/features.html @@ -48,12 +48,12 @@ |start Search +--> - - 4.0.0 - - - org.apache.solr - solr-parent - @version@ - - - org.apache.solr - solr-core - Apache Solr Core - @version@ - Apache Solr Server - jar - - - - - - org.apache.solr - solr-solrj - @version@ - - - - - org.apache.lucene - lucene-core - @version@ - - - org.apache.lucene - lucene-analyzers-common - @version@ - - - org.apache.lucene - lucene-analyzers-phonetic - @version@ - - - org.apache.lucene - lucene-highlighter - @version@ - - - org.apache.lucene - lucene-memory - @version@ - - - org.apache.lucene - lucene-misc - @version@ - - - org.apache.lucene - lucene-queries - @version@ - - - org.apache.lucene - lucene-spatial - @version@ - - - org.apache.lucene - lucene-spellchecker - @version@ - - - - - org.apache.zookeeper - zookeeper - 3.3.1 - - - - - commons-httpclient - commons-httpclient - 3.1 - - - commons-io - commons-io - 1.4 - - - commons-codec - commons-codec - 1.4 - - - commons-lang - commons-lang - 2.4 - - - commons-fileupload - commons-fileupload - 1.2.1 - - - org.apache.velocity - velocity - 1.6.4 - - - org.apache.velocity - velocity-tools - 2.0 - - - - - org.apache.solr - solr-commons-csv - @version@ - - - - - diff --git a/solr/src/maven/solr-solrj-pom.xml.template b/solr/src/maven/solr-solrj-pom.xml.template deleted file mode 100644 index 0d961250740..00000000000 --- a/solr/src/maven/solr-solrj-pom.xml.template +++ /dev/null @@ -1,72 +0,0 @@ - - - - - 4.0.0 - - - org.apache.solr - solr-parent - @version@ - - - org.apache.solr - solr-solrj - Apache Solr Solrj - @version@ - Apache Solr Solrj - jar - - - - - - org.slf4j - slf4j-api - 1.5.5 - - - - - commons-httpclient - commons-httpclient - 3.1 - - - commons-codec - commons-codec - 1.3 - - - commons-io - commons-io - 1.4 - - - commons-fileupload - commons-fileupload - 1.2.1 - - - - - diff --git a/solr/src/site/src/documentation/skins/lucene/css/screen.css b/solr/src/site/src/documentation/skins/lucene/css/screen.css index aa8c457cb30..4e2e040c84a 100644 --- a/solr/src/site/src/documentation/skins/lucene/css/screen.css +++ b/solr/src/site/src/documentation/skins/lucene/css/screen.css @@ -95,7 +95,7 @@ html>body #top .searchbox { #top .searchbox { position: absolute; right: 10px; - height: 42px; + height: 28px; font-size: 70%; white-space: nowrap; text-align: right; diff --git a/solr/src/site/src/documentation/skins/lucene/xslt/html/site-to-xhtml.xsl b/solr/src/site/src/documentation/skins/lucene/xslt/html/site-to-xhtml.xsl index 3b14cdae508..bbac540b143 100644 --- a/solr/src/site/src/documentation/skins/lucene/xslt/html/site-to-xhtml.xsl +++ b/solr/src/site/src/documentation/skins/lucene/xslt/html/site-to-xhtml.xsl @@ -215,15 +215,20 @@ footer, searchbar, css etc. As input, it takes XML of the form: -
    +   - + + @ +
    -
    diff --git a/solr/src/solrj/org/apache/solr/client/solrj/impl/BinaryRequestWriter.java b/solr/src/solrj/org/apache/solr/client/solrj/impl/BinaryRequestWriter.java index a2fbddc77d1..37bf449a76c 100644 --- a/solr/src/solrj/org/apache/solr/client/solrj/impl/BinaryRequestWriter.java +++ b/solr/src/solrj/org/apache/solr/client/solrj/impl/BinaryRequestWriter.java @@ -36,6 +36,7 @@ import java.util.List; */ public class BinaryRequestWriter extends RequestWriter { + @Override public Collection getContentStreams(SolrRequest req) throws IOException { if (req instanceof UpdateRequest) { UpdateRequest updateRequest = (UpdateRequest) req; @@ -55,10 +56,12 @@ public class BinaryRequestWriter extends RequestWriter { } + @Override public String getUpdateContentType() { return "application/octet-stream"; } + @Override public ContentStream getContentStream(final UpdateRequest request) throws IOException { final BAOS baos = new BAOS(); new JavaBinUpdateRequestCodec().marshal(request, baos); @@ -91,6 +94,7 @@ public class BinaryRequestWriter extends RequestWriter { } + @Override public void write(SolrRequest request, OutputStream os) throws IOException { if (request instanceof UpdateRequest) { UpdateRequest updateRequest = (UpdateRequest) request; @@ -106,6 +110,7 @@ public class BinaryRequestWriter extends RequestWriter { } } + @Override public String getPath(SolrRequest req) { if (req instanceof UpdateRequest) { return "/update/javabin"; diff --git a/solr/src/solrj/org/apache/solr/client/solrj/impl/BinaryResponseParser.java b/solr/src/solrj/org/apache/solr/client/solrj/impl/BinaryResponseParser.java index 4b29ce8b52b..13794601adb 100755 --- a/solr/src/solrj/org/apache/solr/client/solrj/impl/BinaryResponseParser.java +++ b/solr/src/solrj/org/apache/solr/client/solrj/impl/BinaryResponseParser.java @@ -30,10 +30,12 @@ import java.io.Reader; * @since solr 1.3 */ public class BinaryResponseParser extends ResponseParser { + @Override public String getWriterType() { return "javabin"; } + @Override public NamedList processResponse(InputStream body, String encoding) { try { return (NamedList) new JavaBinCodec().unmarshal(body); @@ -44,10 +46,12 @@ public class BinaryResponseParser extends ResponseParser { } + @Override public String getVersion() { return "2"; } + @Override public NamedList processResponse(Reader reader) { throw new RuntimeException("Cannot handle character stream"); } diff --git a/solr/src/solrj/org/apache/solr/client/solrj/impl/CloudSolrServer.java b/solr/src/solrj/org/apache/solr/client/solrj/impl/CloudSolrServer.java index 60955330996..1268c402589 100644 --- a/solr/src/solrj/org/apache/solr/client/solrj/impl/CloudSolrServer.java +++ b/solr/src/solrj/org/apache/solr/client/solrj/impl/CloudSolrServer.java @@ -1,5 +1,22 @@ package org.apache.solr.client.solrj.impl; +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + import java.io.IOException; import java.net.MalformedURLException; import java.util.ArrayList; diff --git a/solr/src/solrj/org/apache/solr/client/solrj/impl/CommonsHttpSolrServer.java b/solr/src/solrj/org/apache/solr/client/solrj/impl/CommonsHttpSolrServer.java index 6c2c1f76e7f..477a90035b7 100644 --- a/solr/src/solrj/org/apache/solr/client/solrj/impl/CommonsHttpSolrServer.java +++ b/solr/src/solrj/org/apache/solr/client/solrj/impl/CommonsHttpSolrServer.java @@ -20,7 +20,6 @@ package org.apache.solr.client.solrj.impl; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; -import java.io.Reader; import java.net.MalformedURLException; import java.net.URL; import java.util.*; @@ -62,6 +61,8 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; /** + * The {@link CommonsHttpSolrServer} uses the Apache Commons HTTP Client to connect to solr. + *
    SolrServer server = new CommonsHttpSolrServer( url );
    * * @version $Id$ * @since solr 1.3 @@ -335,11 +336,11 @@ public class CommonsHttpSolrServer extends SolrServer @Override protected void sendData(OutputStream out) throws IOException { - Reader reader = c.getReader(); + InputStream in = c.getStream(); try { - IOUtils.copy(reader, out); + IOUtils.copy(in, out); } finally { - reader.close(); + in.close(); } } }); diff --git a/solr/src/solrj/org/apache/solr/client/solrj/impl/LBHttpSolrServer.java b/solr/src/solrj/org/apache/solr/client/solrj/impl/LBHttpSolrServer.java index b13b708e0bf..6922223f34f 100644 --- a/solr/src/solrj/org/apache/solr/client/solrj/impl/LBHttpSolrServer.java +++ b/solr/src/solrj/org/apache/solr/client/solrj/impl/LBHttpSolrServer.java @@ -105,6 +105,7 @@ public class LBHttpSolrServer extends SolrServer { this.solrServer = solrServer; } + @Override public String toString() { return solrServer.getBaseURL(); } @@ -149,7 +150,7 @@ public class LBHttpSolrServer extends SolrServer { return numDeadServersToTry; } - /** @return The number of dead servers to try if there are no live servers left. + /** @param numDeadServersToTry The number of dead servers to try if there are no live servers left. * Defaults to the number of servers in this request. */ public void setNumDeadServersToTry(int numDeadServersToTry) { this.numDeadServersToTry = numDeadServersToTry; @@ -376,6 +377,7 @@ public class LBHttpSolrServer extends SolrServer { * @throws SolrServerException * @throws IOException */ + @Override public NamedList request(final SolrRequest request) throws SolrServerException, IOException { Exception ex = null; @@ -535,6 +537,7 @@ public class LBHttpSolrServer extends SolrServer { return httpClient; } + @Override protected void finalize() throws Throwable { try { if(this.aliveCheckExecutor!=null) diff --git a/solr/src/solrj/org/apache/solr/client/solrj/impl/StreamingBinaryResponseParser.java b/solr/src/solrj/org/apache/solr/client/solrj/impl/StreamingBinaryResponseParser.java index bcbf378552f..b0b90864296 100644 --- a/solr/src/solrj/org/apache/solr/client/solrj/impl/StreamingBinaryResponseParser.java +++ b/solr/src/solrj/org/apache/solr/client/solrj/impl/StreamingBinaryResponseParser.java @@ -48,12 +48,14 @@ public class StreamingBinaryResponseParser extends BinaryResponseParser { try { JavaBinCodec codec = new JavaBinCodec() { + @Override public SolrDocument readSolrDocument(FastInputStream dis) throws IOException { SolrDocument doc = super.readSolrDocument(dis); callback.streamSolrDocument( doc ); return null; } + @Override public SolrDocumentList readSolrDocumentList(FastInputStream dis) throws IOException { SolrDocumentList solrDocs = new SolrDocumentList(); List list = (List) readVal(dis); diff --git a/solr/src/solrj/org/apache/solr/client/solrj/impl/StreamingUpdateSolrServer.java b/solr/src/solrj/org/apache/solr/client/solrj/impl/StreamingUpdateSolrServer.java index 4460dfb2ce0..c47f4a09957 100644 --- a/solr/src/solrj/org/apache/solr/client/solrj/impl/StreamingUpdateSolrServer.java +++ b/solr/src/solrj/org/apache/solr/client/solrj/impl/StreamingUpdateSolrServer.java @@ -173,12 +173,20 @@ public class StreamingUpdateSolrServer extends CommonsHttpSolrServer } catch (Throwable e) { handleError( e ); - } + } finally { - // remove it from the list of running things... + + // remove it from the list of running things unless we are the last runner and the queue is full... + // in which case, the next queue.put() would block and there would be no runners to handle it. synchronized (runners) { - runners.remove( this ); + if (runners.size() == 1 && queue.remainingCapacity() == 0) { + // keep this runner alive + scheduler.execute(this); + } else { + runners.remove( this ); + } } + log.info( "finished: {}" , this ); runnerLock.unlock(); } @@ -208,7 +216,7 @@ public class StreamingUpdateSolrServer extends CommonsHttpSolrServer return super.request( request ); } } - + try { CountDownLatch tmpLock = lock; if( tmpLock != null ) { @@ -216,18 +224,18 @@ public class StreamingUpdateSolrServer extends CommonsHttpSolrServer } queue.put( req ); - - synchronized( runners ) { - if( runners.isEmpty() - || (queue.remainingCapacity() < queue.size() - && runners.size() < threadCount) ) - { + + synchronized( runners ) { + if( runners.isEmpty() + || (queue.remainingCapacity() < queue.size() + && runners.size() < threadCount) ) + { Runner r = new Runner(); scheduler.execute( r ); runners.add( r ); } } - } + } catch (InterruptedException e) { log.error( "interrupted", e ); throw new IOException( e.getLocalizedMessage() ); diff --git a/solr/src/solrj/org/apache/solr/client/solrj/request/JavaBinUpdateRequestCodec.java b/solr/src/solrj/org/apache/solr/client/solrj/request/JavaBinUpdateRequestCodec.java index a45412584f8..0f5842e632e 100644 --- a/solr/src/solrj/org/apache/solr/client/solrj/request/JavaBinUpdateRequestCodec.java +++ b/solr/src/solrj/org/apache/solr/client/solrj/request/JavaBinUpdateRequestCodec.java @@ -67,6 +67,7 @@ public class JavaBinUpdateRequestCodec { nl.add("delByQ", updateRequest.getDeleteQuery()); nl.add("docs", docIter); new JavaBinCodec(){ + @Override public void writeMap(Map val) throws IOException { if (val instanceof SolrInputDocument) { writeVal(solrInputDocumentToList((SolrInputDocument) val)); @@ -101,6 +102,7 @@ public class JavaBinUpdateRequestCodec { // is ever refactored, this will not work. private boolean seenOuterMostDocIterator = false; + @Override public NamedList readNamedList(FastInputStream dis) throws IOException { int sz = readSize(dis); NamedList nl = new NamedList(); @@ -115,6 +117,7 @@ public class JavaBinUpdateRequestCodec { return nl; } + @Override public List readIterator(FastInputStream fis) throws IOException { // default behavior for reading any regular Iterator in the stream diff --git a/solr/src/solrj/org/apache/solr/client/solrj/response/AnalysisResponseBase.java b/solr/src/solrj/org/apache/solr/client/solrj/response/AnalysisResponseBase.java index f98d2a0b364..55d87d4fbe8 100644 --- a/solr/src/solrj/org/apache/solr/client/solrj/response/AnalysisResponseBase.java +++ b/solr/src/solrj/org/apache/solr/client/solrj/response/AnalysisResponseBase.java @@ -62,12 +62,12 @@ public class AnalysisResponseBase extends SolrResponseBase { * * @return The built analysis phases list. */ - protected List buildPhases(NamedList phaseNL) { + protected List buildPhases(NamedList>> phaseNL) { List phases = new ArrayList(phaseNL.size()); - for (Map.Entry phaseEntry : phaseNL) { + for (Map.Entry>> phaseEntry : phaseNL) { AnalysisPhase phase = new AnalysisPhase(phaseEntry.getKey()); - List tokens = (List) phaseEntry.getValue(); - for (NamedList token : tokens) { + List> tokens = phaseEntry.getValue(); + for (NamedList token : tokens) { TokenInfo tokenInfo = buildTokenInfo(token); phase.addTokenInfo(tokenInfo); } @@ -95,7 +95,7 @@ public class AnalysisResponseBase extends SolrResponseBase { * * @return The built token info. */ - protected TokenInfo buildTokenInfo(NamedList tokenNL) { + protected TokenInfo buildTokenInfo(NamedList tokenNL) { String text = (String) tokenNL.get("text"); String rawText = (String) tokenNL.get("rawText"); String type = (String) tokenNL.get("type"); diff --git a/solr/src/solrj/org/apache/solr/client/solrj/response/DocumentAnalysisResponse.java b/solr/src/solrj/org/apache/solr/client/solrj/response/DocumentAnalysisResponse.java index 9cc66b9f384..f0d5dff787e 100644 --- a/solr/src/solrj/org/apache/solr/client/solrj/response/DocumentAnalysisResponse.java +++ b/solr/src/solrj/org/apache/solr/client/solrj/response/DocumentAnalysisResponse.java @@ -42,24 +42,30 @@ public class DocumentAnalysisResponse extends AnalysisResponseBase implements It public void setResponse(NamedList response) { super.setResponse(response); - NamedList analysis = (NamedList) response.get("analysis"); - for (Map.Entry documentEntry : analysis) { - DocumentAnalysis documentAnalysis = new DocumentAnalysis(documentEntry.getKey()); - NamedList document = (NamedList) documentEntry.getValue(); - for (Map.Entry fieldEntry : document) { + @SuppressWarnings("unchecked") + NamedList>> analysis + = (NamedList>>) response.get("analysis"); + for (Map.Entry>> document : analysis) { + DocumentAnalysis documentAnalysis = new DocumentAnalysis(document.getKey()); + for (Map.Entry> fieldEntry : document.getValue()) { FieldAnalysis fieldAnalysis = new FieldAnalysis(fieldEntry.getKey()); - NamedList field = (NamedList) fieldEntry.getValue(); - NamedList query = (NamedList) field.get("query"); + NamedList field = fieldEntry.getValue(); + + @SuppressWarnings("unchecked") + NamedList>> query + = (NamedList>>) field.get("query"); if (query != null) { List phases = buildPhases(query); fieldAnalysis.setQueryPhases(phases); } - - NamedList index = (NamedList) field.get("index"); - for (Map.Entry valueEntry : index) { + + @SuppressWarnings("unchecked") + NamedList>>> index + = (NamedList>>>) field.get("index"); + for (Map.Entry>>> valueEntry : index) { String fieldValue = valueEntry.getKey(); - NamedList valueNL = (NamedList) valueEntry.getValue(); + NamedList>> valueNL = valueEntry.getValue(); List phases = buildPhases(valueNL); fieldAnalysis.setIndexPhases(fieldValue, phases); } diff --git a/solr/src/solrj/org/apache/solr/client/solrj/response/FieldAnalysisResponse.java b/solr/src/solrj/org/apache/solr/client/solrj/response/FieldAnalysisResponse.java index 722c2c96cc8..e7343647db8 100644 --- a/solr/src/solrj/org/apache/solr/client/solrj/response/FieldAnalysisResponse.java +++ b/solr/src/solrj/org/apache/solr/client/solrj/response/FieldAnalysisResponse.java @@ -42,35 +42,35 @@ public class FieldAnalysisResponse extends AnalysisResponseBase { public void setResponse(NamedList response) { super.setResponse(response); - NamedList analysisNL = (NamedList) response.get("analysis"); + @SuppressWarnings("unchecked") + NamedList>>>>> analysisNL + = (NamedList>>>>>) response.get("analysis"); - NamedList fieldTypesNL = (NamedList) analysisNL.get("field_types"); - for (Map.Entry entry : fieldTypesNL) { - Analysis analysis = new Analysis(); - NamedList fieldTypeNL = (NamedList) entry.getValue(); - NamedList queryNL = (NamedList) fieldTypeNL.get("query"); - List phases = (queryNL == null) ? null : buildPhases(queryNL); - analysis.setQueryPhases(phases); - NamedList indexNL = (NamedList) fieldTypeNL.get("index"); - phases = buildPhases(indexNL); - analysis.setIndexPhases(phases); - String fieldTypeName = entry.getKey(); - analysisByFieldTypeName.put(fieldTypeName, analysis); + for (Map.Entry>>>> entry + : analysisNL.get("field_types")) { + + analysisByFieldTypeName.put(entry.getKey(), buildAnalysis(entry.getValue())); } - NamedList fieldNamesNL = (NamedList) analysisNL.get("field_names"); - for (Map.Entry entry : fieldNamesNL) { + for (Map.Entry>>>> entry + : analysisNL.get("field_names")) { + + analysisByFieldName.put(entry.getKey(), buildAnalysis(entry.getValue())); + } + } + + private Analysis buildAnalysis(NamedList>>> value) { Analysis analysis = new Analysis(); - NamedList fieldNameNL = (NamedList) entry.getValue(); - NamedList queryNL = (NamedList) fieldNameNL.get("query"); + + NamedList>> queryNL = value.get("query"); List phases = (queryNL == null) ? null : buildPhases(queryNL); analysis.setQueryPhases(phases); - NamedList indexNL = (NamedList) fieldNameNL.get("index"); + + NamedList>> indexNL = value.get("index"); phases = buildPhases(indexNL); analysis.setIndexPhases(phases); - String fieldName = entry.getKey(); - analysisByFieldName.put(fieldName, analysis); - } + + return analysis; } /** diff --git a/solr/src/solrj/org/apache/solr/client/solrj/response/FieldStatsInfo.java b/solr/src/solrj/org/apache/solr/client/solrj/response/FieldStatsInfo.java index aa35c7d6ac4..d0154c29698 100644 --- a/solr/src/solrj/org/apache/solr/client/solrj/response/FieldStatsInfo.java +++ b/solr/src/solrj/org/apache/solr/client/solrj/response/FieldStatsInfo.java @@ -74,11 +74,13 @@ public class FieldStatsInfo implements Serializable { stddev = (Double)entry.getValue(); } else if( "facets".equals( entry.getKey() ) ) { + @SuppressWarnings("unchecked") NamedList fields = (NamedList)entry.getValue(); facets = new HashMap>(); for( Map.Entry ev : fields ) { List vals = new ArrayList(); facets.put( ev.getKey(), vals ); + @SuppressWarnings("unchecked") NamedList> vnl = (NamedList>) ev.getValue(); for( int i=0; i _facetInfo = null; private NamedList _debugInfo = null; private NamedList _highlightingInfo = null; - private NamedList _spellInfo = null; + private NamedList> _spellInfo = null; private NamedList _statsInfo = null; - private NamedList _termsInfo = null; + private NamedList> _termsInfo = null; // Facet stuff private Map _facetQuery = null; @@ -105,7 +105,8 @@ public class QueryResponse extends SolrResponseBase } else if( "facet_counts".equals( n ) ) { _facetInfo = (NamedList) res.getVal( i ); - extractFacetInfo( _facetInfo ); + // extractFacetInfo inspects _results, so defer calling it + // in case it hasn't been populated yet. } else if( "debug".equals( n ) ) { _debugInfo = (NamedList) res.getVal( i ); @@ -116,7 +117,7 @@ public class QueryResponse extends SolrResponseBase extractHighlightingInfo( _highlightingInfo ); } else if ( "spellcheck".equals( n ) ) { - _spellInfo = (NamedList) res.getVal( i ); + _spellInfo = (NamedList>) res.getVal( i ); extractSpellCheckInfo( _spellInfo ); } else if ( "stats".equals( n ) ) { @@ -124,17 +125,18 @@ public class QueryResponse extends SolrResponseBase extractStatsInfo( _statsInfo ); } else if ( "terms".equals( n ) ) { - _termsInfo = (NamedList) res.getVal( i ); + _termsInfo = (NamedList>) res.getVal( i ); extractTermsInfo( _termsInfo ); } } + if(_facetInfo != null) extractFacetInfo( _facetInfo ); } - private void extractSpellCheckInfo(NamedList spellInfo) { + private void extractSpellCheckInfo(NamedList> spellInfo) { _spellResponse = new SpellCheckResponse(spellInfo); } - private void extractTermsInfo(NamedList termsInfo) { + private void extractTermsInfo(NamedList> termsInfo) { _termsResponse = new TermsResponse(termsInfo); } diff --git a/solr/src/solrj/org/apache/solr/client/solrj/response/SpellCheckResponse.java b/solr/src/solrj/org/apache/solr/client/solrj/response/SpellCheckResponse.java index 25eb3c71c88..c5bb419ee87 100644 --- a/solr/src/solrj/org/apache/solr/client/solrj/response/SpellCheckResponse.java +++ b/solr/src/solrj/org/apache/solr/client/solrj/response/SpellCheckResponse.java @@ -35,8 +35,8 @@ public class SpellCheckResponse { private List suggestions = new ArrayList(); Map suggestionMap = new LinkedHashMap(); - public SpellCheckResponse(NamedList spellInfo) { - NamedList sugg = (NamedList) spellInfo.get("suggestions"); + public SpellCheckResponse(NamedList> spellInfo) { + NamedList sugg = spellInfo.get("suggestions"); if (sugg == null) { correctlySpelled = true; return; @@ -55,12 +55,14 @@ public class SpellCheckResponse { collations.add(new Collation() .setCollationQueryString((String) sugg.getVal(i))); } else if (o instanceof NamedList) { - NamedList expandedCollation = (NamedList) o; - String collationQuery = (String) expandedCollation - .get("collationQuery"); + @SuppressWarnings("unchecked") + NamedList expandedCollation = (NamedList) o; + String collationQuery + = (String) expandedCollation.get("collationQuery"); int hits = (Integer) expandedCollation.get("hits"); - NamedList misspellingsAndCorrections = (NamedList) expandedCollation - .get("misspellingsAndCorrections"); + @SuppressWarnings("unchecked") + NamedList misspellingsAndCorrections + = (NamedList) expandedCollation.get("misspellingsAndCorrections"); Collation collation = new Collation(); collation.setCollationQueryString(collationQuery); @@ -79,6 +81,7 @@ public class SpellCheckResponse { } } } else { + @SuppressWarnings("unchecked") Suggestion s = new Suggestion(n, (NamedList) sugg.getVal(i)); suggestionMap.put(n, s); suggestions.add(s); @@ -112,7 +115,7 @@ public class SpellCheckResponse { *

    * Return the first collated query string. For convenience and backwards-compatibility. Use getCollatedResults() for full data. *

    - * @return + * @return first collated query string */ public String getCollatedResult() { return collations==null || collations.size()==0 ? null : collations.get(0).collationQueryString; @@ -123,7 +126,7 @@ public class SpellCheckResponse { * Return all collations. * Will include # of hits and misspelling-to-correction details if "spellcheck.collateExtendedResults was true. *

    - * @return + * @return all collations */ public List getCollatedResults() { return collations; @@ -152,16 +155,21 @@ public class SpellCheckResponse { } else if ("origFreq".equals(n)) { originalFrequency = (Integer) suggestion.getVal(i); } else if ("suggestion".equals(n)) { + @SuppressWarnings("unchecked") List list = (List)suggestion.getVal(i); if (list.size() > 0 && list.get(0) instanceof NamedList) { // extended results detected + @SuppressWarnings("unchecked") + List extended = (List)list; alternativeFrequencies = new ArrayList(); - for (NamedList nl : (List)list) { + for (NamedList nl : extended) { alternatives.add((String)nl.get("word")); alternativeFrequencies.add((Integer)nl.get("freq")); } } else { - alternatives.addAll(list); + @SuppressWarnings("unchecked") + List alts = (List) list; + alternatives.addAll(alts); } } } diff --git a/solr/src/solrj/org/apache/solr/client/solrj/response/TermsResponse.java b/solr/src/solrj/org/apache/solr/client/solrj/response/TermsResponse.java index f0b28b405c2..0924a1041f1 100644 --- a/solr/src/solrj/org/apache/solr/client/solrj/response/TermsResponse.java +++ b/solr/src/solrj/org/apache/solr/client/solrj/response/TermsResponse.java @@ -29,14 +29,14 @@ import java.util.Map; public class TermsResponse { private Map> termMap = new HashMap>(); - public TermsResponse(NamedList termsInfo) { + public TermsResponse(NamedList> termsInfo) { for (int i = 0; i < termsInfo.size(); i++) { String fieldName = termsInfo.getName(i); List itemList = new ArrayList(); - NamedList items = (NamedList) termsInfo.getVal(i); + NamedList items = termsInfo.getVal(i); for (int j = 0; j < items.size(); j++) { - Term t = new Term(items.getName(j), ((Number) items.getVal(j)).longValue()); + Term t = new Term(items.getName(j), items.getVal(j).longValue()); itemList.add(t); } diff --git a/solr/src/test-files/solr/conf/schema-copyfield-test.xml b/solr/src/test-files/solr/conf/schema-copyfield-test.xml index 2332e8c5ac4..d294af661c2 100644 --- a/solr/src/test-files/solr/conf/schema-copyfield-test.xml +++ b/solr/src/test-files/solr/conf/schema-copyfield-test.xml @@ -202,13 +202,14 @@ - + - + + diff --git a/solr/src/test-files/solr/conf/schema-required-fields.xml b/solr/src/test-files/solr/conf/schema-required-fields.xml index 1535c8a72b9..f17948476f7 100644 --- a/solr/src/test-files/solr/conf/schema-required-fields.xml +++ b/solr/src/test-files/solr/conf/schema-required-fields.xml @@ -193,13 +193,14 @@ - + - + + diff --git a/solr/src/test-files/solr/conf/schema.xml b/solr/src/test-files/solr/conf/schema.xml index cf10b9e71ab..490bfc75b05 100644 --- a/solr/src/test-files/solr/conf/schema.xml +++ b/solr/src/test-files/solr/conf/schema.xml @@ -236,13 +236,14 @@ - + - + + @@ -401,8 +402,8 @@ - - + + @@ -479,9 +480,10 @@ - + + @@ -506,27 +508,40 @@ both match, the first appearing in the schema will be used. --> + + + + + + + + + + + - + + + diff --git a/solr/src/test-files/solr/conf/schema12.xml b/solr/src/test-files/solr/conf/schema12.xml index 2d89dcc00c8..c8a60840ee1 100755 --- a/solr/src/test-files/solr/conf/schema12.xml +++ b/solr/src/test-files/solr/conf/schema12.xml @@ -252,13 +252,14 @@ - + - + + @@ -286,14 +287,14 @@ - + - + @@ -303,14 +304,14 @@ - + - + @@ -375,7 +376,7 @@ - + @@ -384,7 +385,7 @@ - + @@ -397,7 +398,7 @@ - + @@ -544,6 +545,8 @@ + + diff --git a/solr/src/test-files/solr/conf/solrconfig-repeater.xml b/solr/src/test-files/solr/conf/solrconfig-repeater.xml new file mode 100644 index 00000000000..4584dfaba45 --- /dev/null +++ b/solr/src/test-files/solr/conf/solrconfig-repeater.xml @@ -0,0 +1,93 @@ + + + + + + + + ${tests.luceneMatchVersion:LUCENE_CURRENT} + + ${solr.data.dir:./solr/data} + + + false + 10 + 32 + 2147483647 + 10000 + 1000 + 10000 + + 1000 + 10000 + + single + + + + false + 10 + 32 + 2147483647 + 10000 + + true + + + + + + + true + + + + + + + + + + + + + + + + commit + schema.xml + + + http://localhost:TEST_PORT/solr/replication + 00:00:01 + + + + + + + + + max-age=30, public + + + + diff --git a/solr/src/test/org/apache/solr/BaseDistributedSearchTestCase.java b/solr/src/test/org/apache/solr/BaseDistributedSearchTestCase.java index 945d54da3e1..c32ccc52920 100644 --- a/solr/src/test/org/apache/solr/BaseDistributedSearchTestCase.java +++ b/solr/src/test/org/apache/solr/BaseDistributedSearchTestCase.java @@ -1,5 +1,22 @@ package org.apache.solr; +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + import java.io.File; import java.io.IOException; import java.util.ArrayList; @@ -81,24 +98,28 @@ public abstract class BaseDistributedSearchTestCase extends SolrTestCaseJ4 { public static Logger log = LoggerFactory.getLogger(BaseDistributedSearchTestCase.class); public static RandVal rint = new RandVal() { + @Override public Object val() { return r.nextInt(); } }; public static RandVal rlong = new RandVal() { + @Override public Object val() { return r.nextLong(); } }; public static RandVal rfloat = new RandVal() { + @Override public Object val() { return r.nextFloat(); } }; public static RandVal rdouble = new RandVal() { + @Override public Object val() { return r.nextDouble(); } @@ -113,7 +134,7 @@ public abstract class BaseDistributedSearchTestCase extends SolrTestCaseJ4 { */ public abstract void doTest() throws Exception; - public static String[] fieldNames = new String[]{"n_ti", "n_f", "n_tf", "n_d", "n_td", "n_l", "n_tl", "n_dt", "n_tdt"}; + public static String[] fieldNames = new String[]{"n_ti1", "n_f1", "n_tf1", "n_d1", "n_td1", "n_l1", "n_tl1", "n_dt1", "n_tdt1"}; public static RandVal[] randVals = new RandVal[]{rint, rfloat, rfloat, rdouble, rdouble, rlong, rlong, rdate, rdate}; protected String[] getFieldNames() { @@ -124,12 +145,21 @@ public abstract class BaseDistributedSearchTestCase extends SolrTestCaseJ4 { return randVals; } + /** + * Subclasses can override this to change a test's solr home + * (default is in test-files) + */ + public String getSolrHome() { + return SolrTestCaseJ4.TEST_HOME; + } + @Override public void setUp() throws Exception { SolrTestCaseJ4.resetExceptionIgnores(); // ignore anything with ignore_exception in it super.setUp(); System.setProperty("solr.test.sys.prop1", "propone"); System.setProperty("solr.test.sys.prop2", "proptwo"); + System.setProperty("solr.solr.home", getSolrHome()); testDir = new File(TEMP_DIR, getClass().getName() + "-" + System.currentTimeMillis()); testDir.mkdirs(); @@ -311,6 +341,7 @@ public abstract class BaseDistributedSearchTestCase extends SolrTestCaseJ4 { Thread[] threads = new Thread[nThreads]; for (int i = 0; i < threads.length; i++) { threads[i] = new Thread() { + @Override public void run() { for (int j = 0; j < stress; j++) { int which = r.nextInt(clients.size()); @@ -597,6 +628,7 @@ public abstract class BaseDistributedSearchTestCase extends SolrTestCaseJ4 { public static class RandDate extends RandVal { public static TrieDateField df = new TrieDateField(); + @Override public Object val() { long v = r.nextLong(); Date d = new Date(v); diff --git a/solr/src/test/org/apache/solr/BasicFunctionalityTest.java b/solr/src/test/org/apache/solr/BasicFunctionalityTest.java index 7662ca0eff1..a69fe52c927 100644 --- a/solr/src/test/org/apache/solr/BasicFunctionalityTest.java +++ b/solr/src/test/org/apache/solr/BasicFunctionalityTest.java @@ -29,6 +29,8 @@ import javax.xml.parsers.DocumentBuilderFactory; import org.apache.lucene.document.Field; import org.apache.lucene.document.Fieldable; import org.apache.lucene.index.LogMergePolicy; +import org.apache.solr.common.SolrException; +import org.apache.solr.common.SolrException.ErrorCode; import org.apache.solr.common.params.AppendedSolrParams; import org.apache.solr.common.params.CommonParams; import org.apache.solr.common.params.DefaultSolrParams; @@ -47,6 +49,8 @@ import org.apache.solr.schema.SchemaField; import org.apache.solr.search.DocIterator; import org.apache.solr.search.DocList; import org.apache.solr.update.SolrIndexWriter; + + import org.junit.BeforeClass; import org.junit.Test; @@ -221,10 +225,15 @@ public class BasicFunctionalityTest extends SolrTestCaseJ4 { public void testRequestHandlerBaseException() { final String tmp = "BOO! ignore_exception"; SolrRequestHandler handler = new RequestHandlerBase() { + @Override public String getDescription() { return tmp; } + @Override public String getSourceId() { return tmp; } + @Override public String getSource() { return tmp; } + @Override public String getVersion() { return tmp; } + @Override public void handleRequestBody ( SolrQueryRequest req, SolrQueryResponse rsp ) { throw new RuntimeException(tmp); @@ -653,6 +662,39 @@ public class BasicFunctionalityTest extends SolrTestCaseJ4 { "*[count(//doc)=1]"); } + @Test + public void testAbuseOfSort() { + + assertU(adoc("id", "9999991", + "sortabuse_b", "true", + "sortabuse_t", "zzz xxx ccc vvv bbb nnn aaa sss ddd fff ggg")); + assertU(adoc("id", "9999992", + "sortabuse_b", "true", + "sortabuse_t", "zzz xxx ccc vvv bbb nnn qqq www eee rrr ttt")); + + assertU(commit()); + + try { + assertQ("sort on something that shouldn't work", + req("q", "sortabuse_b:true", + "sort", "sortabuse_t asc"), + "*[count(//doc)=2]"); + fail("no error encountered when sorting on sortabuse_t"); + } catch (Exception outer) { + // EXPECTED + Throwable root = getRootCause(outer); + assertEquals("sort exception root cause", + SolrException.class, root.getClass()); + SolrException e = (SolrException) root; + assertEquals("incorrect error type", + SolrException.ErrorCode.BAD_REQUEST, + SolrException.ErrorCode.getErrorCode(e.code())); + assertTrue("exception doesn't contain field name", + -1 != e.getMessage().indexOf("sortabuse_t")); + } + } + + // /** this doesn't work, but if it did, this is how we'd test it. */ // public void testOverwriteFalse() { diff --git a/solr/src/test/org/apache/solr/ConvertedLegacyTest.java b/solr/src/test/org/apache/solr/ConvertedLegacyTest.java index f6f9d1b0c14..96dd599d172 100644 --- a/solr/src/test/org/apache/solr/ConvertedLegacyTest.java +++ b/solr/src/test/org/apache/solr/ConvertedLegacyTest.java @@ -123,9 +123,9 @@ public class ConvertedLegacyTest extends SolrTestCaseJ4 { // test range assertU("44"); - assertU("44apple"); - assertU("44banana"); - assertU("44pear"); + assertU("44appleapple"); + assertU("44bananabanana"); + assertU("44pearpear"); assertU(""); assertQ(req("val_s:[a TO z]") ,"//*[@numFound='3'] " @@ -228,7 +228,7 @@ public class ConvertedLegacyTest extends SolrTestCaseJ4 { args = new HashMap(); args.put("version","2.0"); args.put("defType","lucenePlusSort"); - req = new LocalSolrQueryRequest(h.getCore(), "val_s:[a TO z];val_s asc", + req = new LocalSolrQueryRequest(h.getCore(), "val_s:[a TO z];val_s1 asc", "standard", 0, 0 , args); assertQ(req ,"//*[@numFound='3'] " @@ -237,7 +237,7 @@ public class ConvertedLegacyTest extends SolrTestCaseJ4 { args = new HashMap(); args.put("version","2.0"); args.put("defType","lucenePlusSort"); - req = new LocalSolrQueryRequest(h.getCore(), "val_s:[a TO z];val_s desc", + req = new LocalSolrQueryRequest(h.getCore(), "val_s:[a TO z];val_s1 desc", "standard", 0, 0 , args); assertQ(req ,"//*[@numFound='3'] " @@ -509,133 +509,133 @@ public class ConvertedLegacyTest extends SolrTestCaseJ4 { // test integer ranges and sorting assertU("44"); - assertU("441234567890"); - assertU("4410"); - assertU("441"); - assertU("442"); - assertU("4415"); - assertU("44-1"); - assertU("44-987654321"); - assertU("442147483647"); - assertU("44-2147483648"); - assertU("440"); + assertU("441234567890"); + assertU("4410"); + assertU("441"); + assertU("442"); + assertU("4415"); + assertU("44-1"); + assertU("44-987654321"); + assertU("442147483647"); + assertU("44-2147483648"); + assertU("440"); assertU(""); assertQ(req("id:44") ,"*[count(//doc)=10]" ); - assertQ(req("num_i:2147483647") + assertQ(req("num_i1:2147483647") ,"//@numFound[.='1'] " ,"//int[.='2147483647']" ); - assertQ(req("num_i:\"-2147483648\"") + assertQ(req("num_i1:\"-2147483648\"") ,"//@numFound[.='1'] " ,"//int[.='-2147483648']" ); - assertQ(req("id:44;num_i asc;") + assertQ(req("id:44;num_i1 asc;") ,"//doc[1]/int[.='-2147483648'] " ,"//doc[last()]/int[.='2147483647']" ); - assertQ(req("id:44;num_i desc;") + assertQ(req("id:44;num_i1 desc;") ,"//doc[1]/int[.='2147483647'] " ,"//doc[last()]/int[.='-2147483648']" ); - assertQ(req("num_i:[0 TO 9]") + assertQ(req("num_i1:[0 TO 9]") ,"*[count(//doc)=3]" ); - assertQ(req("num_i:[-2147483648 TO 2147483647]") + assertQ(req("num_i1:[-2147483648 TO 2147483647]") ,"*[count(//doc)=10]" ); - assertQ(req("num_i:[-10 TO -1]") + assertQ(req("num_i1:[-10 TO -1]") ,"*[count(//doc)=1]" ); // test long ranges and sorting assertU("44"); - assertU("441234567890"); - assertU("4410"); - assertU("441"); - assertU("442"); - assertU("4415"); - assertU("44-1"); - assertU("44-987654321"); - assertU("449223372036854775807"); - assertU("44-9223372036854775808"); - assertU("440"); + assertU("441234567890"); + assertU("4410"); + assertU("441"); + assertU("442"); + assertU("4415"); + assertU("44-1"); + assertU("44-987654321"); + assertU("449223372036854775807"); + assertU("44-9223372036854775808"); + assertU("440"); assertU(""); assertQ(req("id:44") ,"*[count(//doc)=10]" ); - assertQ(req("num_l:9223372036854775807") + assertQ(req("num_l1:9223372036854775807") ,"//@numFound[.='1'] " ,"//long[.='9223372036854775807']" ); - assertQ(req("num_l:\"-9223372036854775808\"") + assertQ(req("num_l1:\"-9223372036854775808\"") ,"//@numFound[.='1'] " ,"//long[.='-9223372036854775808']" ); - assertQ(req("id:44;num_l asc;") + assertQ(req("id:44;num_l1 asc;") ,"//doc[1]/long[.='-9223372036854775808'] " ,"//doc[last()]/long[.='9223372036854775807']" ); - assertQ(req("id:44;num_l desc;") + assertQ(req("id:44;num_l1 desc;") ,"//doc[1]/long[.='9223372036854775807'] " ,"//doc[last()]/long[.='-9223372036854775808']" ); - assertQ(req("num_l:[-1 TO 9]") + assertQ(req("num_l1:[-1 TO 9]") ,"*[count(//doc)=4]" ); - assertQ(req("num_l:[-9223372036854775808 TO 9223372036854775807]") + assertQ(req("num_l1:[-9223372036854775808 TO 9223372036854775807]") ,"*[count(//doc)=10]" ); - assertQ(req("num_l:[-10 TO -1]") + assertQ(req("num_l1:[-10 TO -1]") ,"*[count(//doc)=1]" ); // test binary float ranges and sorting assertU("44"); - assertU("441.4142135"); - assertU("44Infinity"); - assertU("44-Infinity"); - assertU("44NaN"); - assertU("442"); - assertU("44-1"); - assertU("44-987654321"); - assertU("44-999999.99"); - assertU("44-1e20"); - assertU("440"); + assertU("441.4142135"); + assertU("44Infinity"); + assertU("44-Infinity"); + assertU("44NaN"); + assertU("442"); + assertU("44-1"); + assertU("44-987654321"); + assertU("44-999999.99"); + assertU("44-1e20"); + assertU("440"); assertU(""); assertQ(req("id:44") ,"*[count(//doc)=10]" ); - assertQ(req("num_sf:Infinity") + assertQ(req("num_sf1:Infinity") ,"//@numFound[.='1'] " ,"//float[.='Infinity']" ); - assertQ(req("num_sf:\"-Infinity\"") + assertQ(req("num_sf1:\"-Infinity\"") ,"//@numFound[.='1'] " ,"//float[.='-Infinity']" ); - assertQ(req("num_sf:\"NaN\"") + assertQ(req("num_sf1:\"NaN\"") ,"//@numFound[.='1'] " ,"//float[.='NaN']" ); - assertQ(req("num_sf:\"-1e20\"") + assertQ(req("num_sf1:\"-1e20\"") ,"//@numFound[.='1']" ); - assertQ(req("id:44;num_sf asc;") + assertQ(req("id:44;num_sf1 asc;") ,"//doc[1]/float[.='-Infinity'] " ,"//doc[last()]/float[.='NaN']" ); - assertQ(req("id:44;num_sf desc;") + assertQ(req("id:44;num_sf1 desc;") ,"//doc[1]/float[.='NaN'] " ,"//doc[last()]/float[.='-Infinity']" ); - assertQ(req("num_sf:[-1 TO 2]") + assertQ(req("num_sf1:[-1 TO 2]") ,"*[count(//doc)=4]" ); - assertQ(req("num_sf:[-Infinity TO Infinity]") + assertQ(req("num_sf1:[-Infinity TO Infinity]") ,"*[count(//doc)=9]" ); @@ -644,50 +644,50 @@ public class ConvertedLegacyTest extends SolrTestCaseJ4 { // test binary double ranges and sorting assertU("44"); - assertU("441.4142135"); - assertU("44Infinity"); - assertU("44-Infinity"); - assertU("44NaN"); - assertU("442"); - assertU("44-1"); - assertU("441e-100"); - assertU("44-999999.99"); - assertU("44-1e100"); - assertU("440"); + assertU("441.4142135"); + assertU("44Infinity"); + assertU("44-Infinity"); + assertU("44NaN"); + assertU("442"); + assertU("44-1"); + assertU("441e-100"); + assertU("44-999999.99"); + assertU("44-1e100"); + assertU("440"); assertU(""); assertQ(req("id:44") ,"*[count(//doc)=10]" ); - assertQ(req("num_sd:Infinity") + assertQ(req("num_sd1:Infinity") ,"//@numFound[.='1'] " ,"//double[.='Infinity']" ); - assertQ(req("num_sd:\"-Infinity\"") + assertQ(req("num_sd1:\"-Infinity\"") ,"//@numFound[.='1'] " ,"//double[.='-Infinity']" ); - assertQ(req("num_sd:\"NaN\"") + assertQ(req("num_sd1:\"NaN\"") ,"//@numFound[.='1'] " ,"//double[.='NaN']" ); - assertQ(req("num_sd:\"-1e100\"") + assertQ(req("num_sd1:\"-1e100\"") ,"//@numFound[.='1']" ); - assertQ(req("num_sd:\"1e-100\"") + assertQ(req("num_sd1:\"1e-100\"") ,"//@numFound[.='1']" ); - assertQ(req("id:44;num_sd asc;") + assertQ(req("id:44;num_sd1 asc;") ,"//doc[1]/double[.='-Infinity'] " ,"//doc[last()]/double[.='NaN']" ); - assertQ(req("id:44;num_sd desc;") + assertQ(req("id:44;num_sd1 desc;") ,"//doc[1]/double[.='NaN'] " ,"//doc[last()]/double[.='-Infinity']" ); - assertQ(req("num_sd:[-1 TO 2]") + assertQ(req("num_sd1:[-1 TO 2]") ,"*[count(//doc)=5]" ); - assertQ(req("num_sd:[-Infinity TO Infinity]") + assertQ(req("num_sd1:[-Infinity TO Infinity]") ,"*[count(//doc)=9]" ); @@ -695,38 +695,38 @@ public class ConvertedLegacyTest extends SolrTestCaseJ4 { // test sorting on multiple fields assertU("44"); - assertU("4410"); - assertU("441100"); - assertU("44-1"); - assertU("4415"); - assertU("44150"); - assertU("440"); + assertU("4410"); + assertU("441100"); + assertU("44-1"); + assertU("4415"); + assertU("44150"); + assertU("440"); assertU(""); assertQ(req("id:44") ,"*[count(//doc)=6]" ); - assertQ(req("id:44; a_i asc,b_i desc") + assertQ(req("id:44; a_i1 asc,b_i1 desc") ,"*[count(//doc)=6] " ,"//doc[3]/int[.='100'] " ,"//doc[4]/int[.='50']" ); - assertQ(req("id:44;a_i asc , b_i asc;") + assertQ(req("id:44;a_i1 asc , b_i1 asc;") ,"*[count(//doc)=6] " ,"//doc[3]/int[.='50'] " ,"//doc[4]/int[.='100']" ); - assertQ(req("id:44;a_i asc;") + assertQ(req("id:44;a_i1 asc;") ,"*[count(//doc)=6] " ,"//doc[1]/int[.='-1'] " ,"//doc[last()]/int[.='15']" ); - assertQ(req("id:44;a_i asc , score top;") + assertQ(req("id:44;a_i1 asc , score top;") ,"*[count(//doc)=6] " ,"//doc[1]/int[.='-1'] " ,"//doc[last()]/int[.='15']" ); - assertQ(req("id:44; score top , a_i top, b_i bottom ;") + assertQ(req("id:44; score top , a_i1 top, b_i1 bottom ;") ,"*[count(//doc)=6] " ,"//doc[last()]/int[.='-1'] " ,"//doc[1]/int[.='15'] " @@ -738,13 +738,13 @@ public class ConvertedLegacyTest extends SolrTestCaseJ4 { // test sorting with some docs missing the sort field assertU("id_i:[1000 TO 1010]"); - assertU("10001Z"); - assertU("100110A"); - assertU("10021100"); - assertU("1003-1"); - assertU("100415"); - assertU("1005150"); - assertU("10060"); + assertU("10001Z"); + assertU("100110A"); + assertU("10021100"); + assertU("1003-1"); + assertU("100415"); + assertU("1005150"); + assertU("10060"); assertU(""); assertQ(req("id_i:[1000 TO 1010]") ,"*[count(//doc)=7]" @@ -759,13 +759,13 @@ public class ConvertedLegacyTest extends SolrTestCaseJ4 { ,"//doc[1]/int[.='100'] " ,"//doc[2]/int[.='50']" ); - assertQ(req("id_i:[1000 TO 1010]; a_i asc,b_si desc") + assertQ(req("id_i:[1000 TO 1010]; a_i1 asc,b_si desc") ,"*[count(//doc)=7] " ,"//doc[3]/int[.='100'] " ,"//doc[4]/int[.='50'] " ,"//doc[5]/int[.='1000']" ); - assertQ(req("id_i:[1000 TO 1010]; a_i asc,b_si asc") + assertQ(req("id_i:[1000 TO 1010]; a_i1 asc,b_si asc") ,"*[count(//doc)=7] " ,"//doc[3]/int[.='50'] " ,"//doc[4]/int[.='100'] " diff --git a/solr/src/test/org/apache/solr/JSONTestUtil.java b/solr/src/test/org/apache/solr/JSONTestUtil.java index d8cb897a6d9..8bd5a79c8f3 100644 --- a/solr/src/test/org/apache/solr/JSONTestUtil.java +++ b/solr/src/test/org/apache/solr/JSONTestUtil.java @@ -135,14 +135,16 @@ class CollectionTester { if (!expected.equals(val)) { // make an exception for some numerics - if (expected instanceof Integer && val instanceof Long || expected instanceof Long && val instanceof Integer + if ((expected instanceof Integer && val instanceof Long || expected instanceof Long && val instanceof Integer) && ((Number)expected).longValue() == ((Number)val).longValue()) { - // OK - } else if (expected instanceof Float && val instanceof Double || expected instanceof Double && val instanceof Float - && ((Number)expected).doubleValue() == ((Number)val).doubleValue()) - { - // OK + return true; + } else if ((expected instanceof Float && val instanceof Double || expected instanceof Double && val instanceof Float)) { + double a = ((Number)expected).doubleValue(); + double b = ((Number)val).doubleValue(); + if (Double.compare(a,b) == 0) return true; + if (Math.abs(a-b) < 1e-5) return true; + return false; } else { setErr("mismatch: '" + expected + "'!='" + val + "'"); return false; diff --git a/solr/src/test/org/apache/solr/SolrTestCaseJ4.java b/solr/src/test/org/apache/solr/SolrTestCaseJ4.java index 2906e46c630..ecbe82f4382 100755 --- a/solr/src/test/org/apache/solr/SolrTestCaseJ4.java +++ b/solr/src/test/org/apache/solr/SolrTestCaseJ4.java @@ -22,6 +22,7 @@ package org.apache.solr; import org.apache.lucene.util.LuceneTestCase; import org.apache.noggit.CharArr; import org.apache.noggit.JSONUtil; +import org.apache.noggit.ObjectBuilder; import org.apache.solr.common.SolrException; import org.apache.solr.common.SolrInputDocument; import org.apache.solr.common.SolrInputField; @@ -602,6 +603,7 @@ public abstract class SolrTestCaseJ4 extends LuceneTestCase { /** Neccessary to make method signatures un-ambiguous */ public static class XmlDoc { public String xml; + @Override public String toString() { return xml; } } @@ -727,6 +729,7 @@ public abstract class SolrTestCaseJ4 extends LuceneTestCase { public int order; // the order this document was added to the index + @Override public String toString() { return "Doc("+order+"):"+fields.toString(); } @@ -780,6 +783,7 @@ public abstract class SolrTestCaseJ4 extends LuceneTestCase { public static class Fld { public FldType ftype; public List vals; + @Override public String toString() { return ftype.fname + "=" + (vals.size()==1 ? vals.get(0).toString() : vals.toString()); } @@ -834,17 +838,9 @@ public abstract class SolrTestCaseJ4 extends LuceneTestCase { // commit an average of 10 times for large sets, or 10% of the time for small sets int commitOneOutOf = Math.max(nDocs/10, 10); - - // find the max order (docid) and start from there - int order = -1; - for (Doc doc : model.values()) { - order = Math.max(order, doc.order); - } - order++; - for (int i=0; i docList = (List)response; + int order = 0; + for (Map doc : docList) { + Object id = doc.get("id"); + Doc modelDoc = model.get(id); + if (modelDoc == null) continue; // may be some docs in the index that aren't modeled + modelDoc.order = order++; + } + + // make sure we updated the order of all docs in the model + assertEquals(order, model.size()); + return model; } @@ -1052,10 +1067,18 @@ public abstract class SolrTestCaseJ4 extends LuceneTestCase { static String determineSourceHome() { // ugly, ugly hack to determine the example home without depending on the CWD // this is needed for example/multicore tests which reside outside the classpath - File base = getFile("solr/conf/"); + File base = getFile("solr/conf/").getAbsoluteFile(); while (!new File(base, "solr/CHANGES.txt").exists()) { base = base.getParentFile(); } return new File(base, "solr/").getAbsolutePath(); } + + public static Throwable getRootCause(Throwable t) { + Throwable result = t; + for (Throwable cause = t; null != cause; cause = cause.getCause()) { + result = cause; + } + return result; + } } diff --git a/solr/src/test/org/apache/solr/TestDistributedSearch.java b/solr/src/test/org/apache/solr/TestDistributedSearch.java index b04c51c023d..5151564fedd 100755 --- a/solr/src/test/org/apache/solr/TestDistributedSearch.java +++ b/solr/src/test/org/apache/solr/TestDistributedSearch.java @@ -38,7 +38,7 @@ public class TestDistributedSearch extends BaseDistributedSearchTestCase { String ndouble = "n_d"; String tdouble = "n_td"; String nlong = "n_l"; - String tlong = "n_tl"; + String tlong = "other_tl1"; String ndate = "n_dt"; String tdate = "n_tdt"; @@ -95,10 +95,11 @@ public class TestDistributedSearch extends BaseDistributedSearchTestCase { // these queries should be exactly ordered and scores should exactly match query("q","*:*", "sort",i1+" desc"); + query("q","*:*", "sort","{!func}add("+i1+",5)"+" desc"); query("q","*:*", "sort",i1+" asc"); query("q","*:*", "sort",i1+" desc", "fl","*,score"); - query("q","*:*", "sort",tlong+" asc", "fl","score"); // test legacy behavior - "score"=="*,score" - query("q","*:*", "sort",tlong+" desc"); + query("q","*:*", "sort","n_tl1 asc", "fl","score"); // test legacy behavior - "score"=="*,score" + query("q","*:*", "sort","n_tl1 desc"); handle.put("maxScore", SKIPVAL); query("q","{!func}"+i1);// does not expect maxScore. So if it comes ,ignore it. JavaBinCodec.writeSolrDocumentList() //is agnostic of request params. diff --git a/solr/src/test/org/apache/solr/TestGroupingSearch.java b/solr/src/test/org/apache/solr/TestGroupingSearch.java index 09ab2a52d2f..f0b53bc32fe 100644 --- a/solr/src/test/org/apache/solr/TestGroupingSearch.java +++ b/solr/src/test/org/apache/solr/TestGroupingSearch.java @@ -30,6 +30,10 @@ import java.util.*; public class TestGroupingSearch extends SolrTestCaseJ4 { + public static final String FOO_STRING_FIELD = "foo_s1"; + public static final String SMALL_STRING_FIELD = "small_s1"; + public static final String SMALL_INT_FIELD = "small_i"; + @BeforeClass public static void beforeTests() throws Exception { initCore("solrconfig.xml","schema12.xml"); @@ -376,9 +380,9 @@ public class TestGroupingSearch extends SolrTestCaseJ4 { types.add(new FldType("id",ONE_ONE, new SVal('A','Z',4,4))); types.add(new FldType("score_f",ONE_ONE, new FVal(1,100))); // field used to score types.add(new FldType("foo_i",ZERO_ONE, new IRange(0,indexSize))); - types.add(new FldType("foo_s",ZERO_ONE, new SVal('a','z',1,2))); - types.add(new FldType("small_s",ZERO_ONE, new SVal('a',(char)('c'+indexSize/10),1,1))); - types.add(new FldType("small_i",ZERO_ONE, new IRange(0,5+indexSize/10))); + types.add(new FldType(FOO_STRING_FIELD,ZERO_ONE, new SVal('a','z',1,2))); + types.add(new FldType(SMALL_STRING_FIELD,ZERO_ONE, new SVal('a',(char)('c'+indexSize/10),1,1))); + types.add(new FldType(SMALL_INT_FIELD,ZERO_ONE, new IRange(0,5+indexSize/10))); clearIndex(); Map model = indexDocs(types, null, indexSize); @@ -389,36 +393,36 @@ public class TestGroupingSearch extends SolrTestCaseJ4 { clearIndex(); model.clear(); Doc d1 = createDoc(types); - d1.getValues("small_s").set(0,"c"); - d1.getValues("small_i").set(0,5); + d1.getValues(SMALL_STRING_FIELD).set(0,"c"); + d1.getValues(SMALL_INT_FIELD).set(0,5); d1.order = 0; updateJ(toJSON(d1), params("commit","true")); model.put(d1.id, d1); d1 = createDoc(types); - d1.getValues("small_s").set(0,"b"); - d1.getValues("small_i").set(0,5); + d1.getValues(SMALL_STRING_FIELD).set(0,"b"); + d1.getValues(SMALL_INT_FIELD).set(0,5); d1.order = 1; updateJ(toJSON(d1), params("commit","false")); model.put(d1.id, d1); d1 = createDoc(types); - d1.getValues("small_s").set(0,"c"); - d1.getValues("small_i").set(0,5); + d1.getValues(SMALL_STRING_FIELD).set(0,"c"); + d1.getValues(SMALL_INT_FIELD).set(0,5); d1.order = 2; updateJ(toJSON(d1), params("commit","false")); model.put(d1.id, d1); d1 = createDoc(types); - d1.getValues("small_s").set(0,"c"); - d1.getValues("small_i").set(0,5); + d1.getValues(SMALL_STRING_FIELD).set(0,"c"); + d1.getValues(SMALL_INT_FIELD).set(0,5); d1.order = 3; updateJ(toJSON(d1), params("commit","false")); model.put(d1.id, d1); d1 = createDoc(types); - d1.getValues("small_s").set(0,"b"); - d1.getValues("small_i").set(0,2); + d1.getValues(SMALL_STRING_FIELD).set(0,"b"); + d1.getValues(SMALL_INT_FIELD).set(0,2); d1.order = 4; updateJ(toJSON(d1), params("commit","true")); model.put(d1.id, d1); @@ -447,11 +451,11 @@ public class TestGroupingSearch extends SolrTestCaseJ4 { // Test specific case if (false) { - groupField="small_i"; - sortComparator=createComparator(Arrays.asList(createComparator("small_s", true, true, false, true))); - sortStr = "small_s asc"; - groupComparator = createComparator(Arrays.asList(createComparator("small_s", true, true, false, false))); - groupSortStr = "small_s asc"; + groupField=SMALL_INT_FIELD; + sortComparator=createComparator(Arrays.asList(createComparator(SMALL_STRING_FIELD, true, true, false, true))); + sortStr = SMALL_STRING_FIELD + " asc"; + groupComparator = createComparator(Arrays.asList(createComparator(SMALL_STRING_FIELD, true, true, false, false))); + groupSortStr = SMALL_STRING_FIELD + " asc"; rows=1; start=0; group_offset=1; group_limit=1; } @@ -526,8 +530,7 @@ public class TestGroupingSearch extends SolrTestCaseJ4 { Map resultSet = new LinkedHashMap(); group.put("doclist", resultSet); resultSet.put("numFound", grp.docs.size()); - resultSet.put("start", start); - + resultSet.put("start", group_offset); List docs = new ArrayList(); resultSet.put("docs", docs); for (int j=group_offset; j args = new HashMap(DEFAULT_VERSION_PARAM); - - factory.init(args); - factory.inform(new LinesMockSolrResourceLoader(new ArrayList())); - Tokenizer tokenizer = new WhitespaceTokenizer(DEFAULT_VERSION, - new StringReader(StrUtils.join(Arrays.asList(test), ' '))); - TokenStream stream = factory.create(tokenizer); - assertTokenStreamContents(stream, gold); - } - - public void testProtected() throws Exception { - EnglishStemmer stemmer = new EnglishStemmer(); - String[] test = {"The", "fledgling", "banks", "were", "counting", "on", "a", "big", "boom", "in", "banking"}; - String[] gold = new String[test.length]; - for (int i = 0; i < test.length; i++) { - if (test[i].equals("fledgling") == false && test[i].equals("banks") == false) { - stemmer.setCurrent(test[i]); - stemmer.stem(); - gold[i] = stemmer.getCurrent(); - } else { - gold[i] = test[i]; - } - } - - EnglishPorterFilterFactory factory = new EnglishPorterFilterFactory(); - Map args = new HashMap(DEFAULT_VERSION_PARAM); - args.put(EnglishPorterFilterFactory.PROTECTED_TOKENS, "who-cares.txt"); - factory.init(args); - List lines = new ArrayList(); - Collections.addAll(lines, "banks", "fledgling"); - factory.inform(new LinesMockSolrResourceLoader(lines)); - Tokenizer tokenizer = new WhitespaceTokenizer(DEFAULT_VERSION, - new StringReader(StrUtils.join(Arrays.asList(test), ' '))); - TokenStream stream = factory.create(tokenizer); - assertTokenStreamContents(stream, gold); - } - - class LinesMockSolrResourceLoader implements ResourceLoader { - List lines; - - LinesMockSolrResourceLoader(List lines) { - this.lines = lines; - } - - public List getLines(String resource) throws IOException { - return lines; - } - - public Object newInstance(String cname, String... subpackages) { - return null; - } - - public InputStream openResource(String resource) throws IOException { - return null; - } - } -} - diff --git a/solr/src/test/org/apache/solr/analysis/LengthFilterTest.java b/solr/src/test/org/apache/solr/analysis/LengthFilterTest.java index 66ba3a89281..95f5dc1cf25 100644 --- a/solr/src/test/org/apache/solr/analysis/LengthFilterTest.java +++ b/solr/src/test/org/apache/solr/analysis/LengthFilterTest.java @@ -31,9 +31,19 @@ public class LengthFilterTest extends BaseTokenTestCase { Map args = new HashMap(); args.put(LengthFilterFactory.MIN_KEY, String.valueOf(4)); args.put(LengthFilterFactory.MAX_KEY, String.valueOf(10)); + // default: args.put("enablePositionIncrements", "false"); factory.init(args); String test = "foo foobar super-duper-trooper"; TokenStream stream = factory.create(new WhitespaceTokenizer(DEFAULT_VERSION, new StringReader(test))); - assertTokenStreamContents(stream, new String[] { "foobar" }); + assertTokenStreamContents(stream, new String[] { "foobar" }, new int[] { 1 }); + + factory = new LengthFilterFactory(); + args = new HashMap(); + args.put(LengthFilterFactory.MIN_KEY, String.valueOf(4)); + args.put(LengthFilterFactory.MAX_KEY, String.valueOf(10)); + args.put("enablePositionIncrements", "true"); + factory.init(args); + stream = factory.create(new WhitespaceTokenizer(DEFAULT_VERSION, new StringReader(test))); + assertTokenStreamContents(stream, new String[] { "foobar" }, new int[] { 2 }); } } \ No newline at end of file diff --git a/solr/src/test/org/apache/solr/analysis/SnowballPorterFilterFactoryTest.java b/solr/src/test/org/apache/solr/analysis/SnowballPorterFilterFactoryTest.java index 5475d3ed857..3d364f7eba1 100644 --- a/solr/src/test/org/apache/solr/analysis/SnowballPorterFilterFactoryTest.java +++ b/solr/src/test/org/apache/solr/analysis/SnowballPorterFilterFactoryTest.java @@ -33,7 +33,6 @@ import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.ArrayList; -import java.util.Collections; public class SnowballPorterFilterFactoryTest extends BaseTokenTestCase { @@ -59,37 +58,6 @@ public class SnowballPorterFilterFactoryTest extends BaseTokenTestCase { assertTokenStreamContents(stream, gold); } - /** - * Tests the protected words mechanism of EnglishPorterFilterFactory - */ - @Deprecated - public void testProtectedOld() throws Exception { - EnglishStemmer stemmer = new EnglishStemmer(); - String[] test = {"The", "fledgling", "banks", "were", "counting", "on", "a", "big", "boom", "in", "banking"}; - String[] gold = new String[test.length]; - for (int i = 0; i < test.length; i++) { - if (test[i].equals("fledgling") == false && test[i].equals("banks") == false) { - stemmer.setCurrent(test[i]); - stemmer.stem(); - gold[i] = stemmer.getCurrent(); - } else { - gold[i] = test[i]; - } - } - - EnglishPorterFilterFactory factory = new EnglishPorterFilterFactory(); - Map args = new HashMap(DEFAULT_VERSION_PARAM); - args.put(SnowballPorterFilterFactory.PROTECTED_TOKENS, "who-cares.txt"); - factory.init(args); - List lines = new ArrayList(); - Collections.addAll(lines, "banks", "fledgling"); - factory.inform(new LinesMockSolrResourceLoader(lines)); - Tokenizer tokenizer = new WhitespaceTokenizer(DEFAULT_VERSION, - new StringReader(StrUtils.join(Arrays.asList(test), ' '))); - TokenStream stream = factory.create(tokenizer); - assertTokenStreamContents(stream, gold); - } - class LinesMockSolrResourceLoader implements ResourceLoader { List lines; diff --git a/solr/src/test/org/apache/solr/analysis/TestBufferedTokenStream.java b/solr/src/test/org/apache/solr/analysis/TestBufferedTokenStream.java deleted file mode 100644 index 6c4baa44e86..00000000000 --- a/solr/src/test/org/apache/solr/analysis/TestBufferedTokenStream.java +++ /dev/null @@ -1,92 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.solr.analysis; - -import org.apache.lucene.analysis.Token; -import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.Tokenizer; -import org.apache.lucene.analysis.core.WhitespaceTokenizer; -import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; - -import java.io.IOException; -import java.io.StringReader; - -/** - * Test that BufferedTokenStream behaves as advertised in subclasses. - */ -public class TestBufferedTokenStream extends BaseTokenTestCase { - - /** Example of a class implementing the rule "A" "B" => "Q" "B" */ - public static class AB_Q_Stream extends BufferedTokenStream { - public AB_Q_Stream(TokenStream input) {super(input);} - protected Token process(Token t) throws IOException { - if ("A".equals(new String(t.buffer(), 0, t.length()))) { - Token t2 = read(); - if (t2!=null && "B".equals(new String(t2.buffer(), 0, t2.length()))) t.setEmpty().append("Q"); - if (t2!=null) pushBack(t2); - } - return t; - } - } - - /** Example of a class implementing "A" "B" => "A" "A" "B" */ - public static class AB_AAB_Stream extends BufferedTokenStream { - public AB_AAB_Stream(TokenStream input) {super(input);} - protected Token process(Token t) throws IOException { - if ("A".equals(new String(t.buffer(), 0, t.length())) && - "B".equals(new String(peek(1).buffer(), 0, peek(1).length()))) - write((Token)t.clone()); - return t; - } - } - - public void testABQ() throws Exception { - final String input = "How now A B brown A cow B like A B thing?"; - final String expected = "How now Q B brown A cow B like Q B thing?"; - TokenStream ts = new AB_Q_Stream - (new WhitespaceTokenizer(DEFAULT_VERSION, new StringReader(input))); - assertTokenStreamContents(ts, expected.split("\\s")); - } - - public void testABAAB() throws Exception { - final String input = "How now A B brown A cow B like A B thing?"; - final String expected = "How now A A B brown A cow B like A A B thing?"; - TokenStream ts = new AB_AAB_Stream - (new WhitespaceTokenizer(DEFAULT_VERSION, new StringReader(input))); - assertTokenStreamContents(ts, expected.split("\\s")); - } - - public void testReset() throws Exception { - final String input = "How now A B brown A cow B like A B thing?"; - Tokenizer tokenizer = new WhitespaceTokenizer(DEFAULT_VERSION, new StringReader(input)); - TokenStream ts = new AB_AAB_Stream(tokenizer); - CharTermAttribute term = ts.addAttribute(CharTermAttribute.class); - assertTrue(ts.incrementToken()); - assertEquals("How", term.toString()); - assertTrue(ts.incrementToken()); - assertEquals("now", term.toString()); - assertTrue(ts.incrementToken()); - assertEquals("A", term.toString()); - // reset back to input, - // if reset() does not work correctly then previous buffered tokens will remain - tokenizer.reset(new StringReader(input)); - ts.reset(); - assertTrue(ts.incrementToken()); - assertEquals("How", term.toString()); - } -} diff --git a/solr/src/test/org/apache/solr/analysis/TestGalicianStemFilterFactory.java b/solr/src/test/org/apache/solr/analysis/TestGalicianStemFilterFactory.java new file mode 100644 index 00000000000..7d35701d4c0 --- /dev/null +++ b/solr/src/test/org/apache/solr/analysis/TestGalicianStemFilterFactory.java @@ -0,0 +1,36 @@ +package org.apache.solr.analysis; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.Reader; +import java.io.StringReader; + +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.core.WhitespaceTokenizer; + +/** + * Simple tests to ensure the Galician stem factory is working. + */ +public class TestGalicianStemFilterFactory extends BaseTokenTestCase { + public void testStemming() throws Exception { + Reader reader = new StringReader("cariñosa"); + GalicianStemFilterFactory factory = new GalicianStemFilterFactory(); + TokenStream stream = factory.create(new WhitespaceTokenizer(DEFAULT_VERSION, reader)); + assertTokenStreamContents(stream, new String[] { "cariñ" }); + } +} diff --git a/solr/src/test/org/apache/solr/analysis/TestMultiWordSynonyms.java b/solr/src/test/org/apache/solr/analysis/TestMultiWordSynonyms.java index e4f71c57249..f0dd0782567 100644 --- a/solr/src/test/org/apache/solr/analysis/TestMultiWordSynonyms.java +++ b/solr/src/test/org/apache/solr/analysis/TestMultiWordSynonyms.java @@ -1,3 +1,20 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + package org.apache.solr.analysis; import org.apache.lucene.analysis.core.WhitespaceTokenizer; diff --git a/solr/src/test/org/apache/solr/analysis/TestPortugueseStemFilterFactory.java b/solr/src/test/org/apache/solr/analysis/TestPortugueseStemFilterFactory.java new file mode 100644 index 00000000000..a8e309a9823 --- /dev/null +++ b/solr/src/test/org/apache/solr/analysis/TestPortugueseStemFilterFactory.java @@ -0,0 +1,36 @@ +package org.apache.solr.analysis; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.Reader; +import java.io.StringReader; + +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.core.WhitespaceTokenizer; + +/** + * Simple tests to ensure the Portuguese stem factory is working. + */ +public class TestPortugueseStemFilterFactory extends BaseTokenTestCase { + public void testStemming() throws Exception { + Reader reader = new StringReader("maluquice"); + PortugueseStemFilterFactory factory = new PortugueseStemFilterFactory(); + TokenStream stream = factory.create(new WhitespaceTokenizer(DEFAULT_VERSION, reader)); + assertTokenStreamContents(stream, new String[] { "maluc" }); + } +} diff --git a/solr/src/test/org/apache/solr/analysis/TestRemoveDuplicatesTokenFilterFactory.java b/solr/src/test/org/apache/solr/analysis/TestRemoveDuplicatesTokenFilterFactory.java index 6eb8a17f2c0..7795fd640f4 100644 --- a/solr/src/test/org/apache/solr/analysis/TestRemoveDuplicatesTokenFilterFactory.java +++ b/solr/src/test/org/apache/solr/analysis/TestRemoveDuplicatesTokenFilterFactory.java @@ -48,6 +48,7 @@ public class TestRemoveDuplicatesTokenFilterFactory extends BaseTokenTestCase { CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class); PositionIncrementAttribute posIncAtt = addAttribute(PositionIncrementAttribute.class); + @Override public boolean incrementToken() { if (toks.hasNext()) { clearAttributes(); diff --git a/solr/src/test/org/apache/solr/analysis/TestReversedWildcardFilterFactory.java b/solr/src/test/org/apache/solr/analysis/TestReversedWildcardFilterFactory.java index 5673ea8676e..184f241cff0 100644 --- a/solr/src/test/org/apache/solr/analysis/TestReversedWildcardFilterFactory.java +++ b/solr/src/test/org/apache/solr/analysis/TestReversedWildcardFilterFactory.java @@ -52,6 +52,7 @@ public class TestReversedWildcardFilterFactory extends SolrTestCaseJ4 { initCore("solrconfig.xml","schema-reversed.xml"); } + @Override @Before public void setUp() throws Exception { super.setUp(); diff --git a/solr/src/test/org/apache/solr/client/solrj/SolrExampleTestBase.java b/solr/src/test/org/apache/solr/client/solrj/SolrExampleTestBase.java index d9725aafd93..60fc20123e3 100644 --- a/solr/src/test/org/apache/solr/client/solrj/SolrExampleTestBase.java +++ b/solr/src/test/org/apache/solr/client/solrj/SolrExampleTestBase.java @@ -30,6 +30,7 @@ import org.apache.solr.util.AbstractSolrTestCase; */ abstract public class SolrExampleTestBase extends AbstractSolrTestCase { + @Override public String getSolrHome() { return "../../../example/solr/"; } @Override public String getSchemaFile() { return getSolrHome()+"conf/schema.xml"; } diff --git a/solr/src/test/org/apache/solr/client/solrj/SolrExampleTests.java b/solr/src/test/org/apache/solr/client/solrj/SolrExampleTests.java index f79a622f06e..071f74e0255 100644 --- a/solr/src/test/org/apache/solr/client/solrj/SolrExampleTests.java +++ b/solr/src/test/org/apache/solr/client/solrj/SolrExampleTests.java @@ -576,17 +576,17 @@ abstract public class SolrExampleTests extends SolrJettyTestBase int id = 1; ArrayList docs = new ArrayList(); - docs.add( makeTestDoc( "id", id++, "features", "AAA", "cat", "a", "inStock", true ) ); - docs.add( makeTestDoc( "id", id++, "features", "AAA", "cat", "a", "inStock", false ) ); - docs.add( makeTestDoc( "id", id++, "features", "AAA", "cat", "a", "inStock", true ) ); - docs.add( makeTestDoc( "id", id++, "features", "AAA", "cat", "b", "inStock", false ) ); - docs.add( makeTestDoc( "id", id++, "features", "AAA", "cat", "b", "inStock", true ) ); - docs.add( makeTestDoc( "id", id++, "features", "BBB", "cat", "a", "inStock", false ) ); - docs.add( makeTestDoc( "id", id++, "features", "BBB", "cat", "a", "inStock", true ) ); - docs.add( makeTestDoc( "id", id++, "features", "BBB", "cat", "b", "inStock", false ) ); - docs.add( makeTestDoc( "id", id++, "features", "BBB", "cat", "b", "inStock", true ) ); - docs.add( makeTestDoc( "id", id++, "features", "BBB", "cat", "b", "inStock", false ) ); - docs.add( makeTestDoc( "id", id++, "features", "BBB", "cat", "b", "inStock", true ) ); + docs.add( makeTestDoc( "id", id++, "features", "aaa", "cat", "a", "inStock", true ) ); + docs.add( makeTestDoc( "id", id++, "features", "aaa", "cat", "a", "inStock", false ) ); + docs.add( makeTestDoc( "id", id++, "features", "aaa", "cat", "a", "inStock", true ) ); + docs.add( makeTestDoc( "id", id++, "features", "aaa", "cat", "b", "inStock", false ) ); + docs.add( makeTestDoc( "id", id++, "features", "aaa", "cat", "b", "inStock", true ) ); + docs.add( makeTestDoc( "id", id++, "features", "bbb", "cat", "a", "inStock", false ) ); + docs.add( makeTestDoc( "id", id++, "features", "bbb", "cat", "a", "inStock", true ) ); + docs.add( makeTestDoc( "id", id++, "features", "bbb", "cat", "b", "inStock", false ) ); + docs.add( makeTestDoc( "id", id++, "features", "bbb", "cat", "b", "inStock", true ) ); + docs.add( makeTestDoc( "id", id++, "features", "bbb", "cat", "b", "inStock", false ) ); + docs.add( makeTestDoc( "id", id++, "features", "bbb", "cat", "b", "inStock", true ) ); docs.add( makeTestDoc( "id", id++ ) ); // something not matching server.add( docs ); server.commit(); @@ -610,7 +610,14 @@ abstract public class SolrExampleTests extends SolrJettyTestBase // System.out.println(); // } - // Now make sure they have reasonable stuff + // PIVOT: features,cat + // features=bbb (6) + // cat=b (4) + // cat=a (2) + // features=aaa (5) + // cat=a (3) + // cat=b (2) + List pivot = pivots.getVal( 0 ); assertEquals( "features,cat", pivots.getName( 0 ) ); assertEquals( 2, pivot.size() ); @@ -627,6 +634,15 @@ abstract public class SolrExampleTests extends SolrJettyTestBase assertEquals( "a", counts.get(1).getValue() ); assertEquals( 2, counts.get(1).getCount() ); + + // PIVOT: cat,features + // cat=b (6) + // features=bbb (4) + // features=aaa (2) + // cat=a (5) + // features=aaa (3) + // features=bbb (2) + ff = pivot.get( 1 ); assertEquals( "features", ff.getField() ); assertEquals( "aaa", ff.getValue() ); @@ -638,16 +654,32 @@ abstract public class SolrExampleTests extends SolrJettyTestBase assertEquals( "b", counts.get(1).getValue() ); assertEquals( 2, counts.get(1).getCount() ); - // 3 deep + // Three deep: + // PIVOT: features,cat,inStock + // features=bbb (6) + // cat=b (4) + // inStock=false (2) + // inStock=true (2) + // cat=a (2) + // inStock=false (1) + // inStock=true (1) + // features=aaa (5) + // cat=a (3) + // inStock=true (2) + // inStock=false (1) + // cat=b (2) + // inStock=false (1) + // inStock=true (1) + pivot = pivots.getVal( 2 ); assertEquals( "features,cat,inStock", pivots.getName( 2 ) ); assertEquals( 2, pivot.size() ); - PivotField p = pivot.get( 1 ).getPivot().get(0); + PivotField p = pivot.get( 1 ).getPivot().get(0); // get(1) should be features=AAAA, then get(0) should be cat=a assertEquals( "cat", p.getField() ); assertEquals( "a", p.getValue() ); counts = p.getPivot(); // p.write(System.out, 5 ); - assertEquals( 1, counts.size() ); + assertEquals( 2, counts.size() ); // 2 trues and 1 false under features=AAAA,cat=a assertEquals( "inStock", counts.get(0).getField() ); assertEquals( Boolean.TRUE, counts.get(0).getValue() ); assertEquals( 2, counts.get(0).getCount() ); diff --git a/solr/src/test/org/apache/solr/client/solrj/SolrJettyTestBase.java b/solr/src/test/org/apache/solr/client/solrj/SolrJettyTestBase.java index 145317efeae..ad8a70aa299 100755 --- a/solr/src/test/org/apache/solr/client/solrj/SolrJettyTestBase.java +++ b/solr/src/test/org/apache/solr/client/solrj/SolrJettyTestBase.java @@ -1,5 +1,22 @@ package org.apache.solr.client.solrj; +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + import java.io.File; import java.io.IOException; diff --git a/solr/src/test/org/apache/solr/client/solrj/TestLBHttpSolrServer.java b/solr/src/test/org/apache/solr/client/solrj/TestLBHttpSolrServer.java index 8886a9445db..75f4e341860 100644 --- a/solr/src/test/org/apache/solr/client/solrj/TestLBHttpSolrServer.java +++ b/solr/src/test/org/apache/solr/client/solrj/TestLBHttpSolrServer.java @@ -48,6 +48,7 @@ public class TestLBHttpSolrServer extends LuceneTestCase { SolrInstance[] solr = new SolrInstance[3]; HttpClient httpClient; + @Override public void setUp() throws Exception { super.setUp(); httpClient = new HttpClient(new MultiThreadedHttpConnectionManager()); diff --git a/solr/src/test/org/apache/solr/client/solrj/embedded/JettyWebappTest.java b/solr/src/test/org/apache/solr/client/solrj/embedded/JettyWebappTest.java index 7a6068c7bb4..cce5d3dae04 100644 --- a/solr/src/test/org/apache/solr/client/solrj/embedded/JettyWebappTest.java +++ b/solr/src/test/org/apache/solr/client/solrj/embedded/JettyWebappTest.java @@ -86,23 +86,23 @@ public class JettyWebappTest extends LuceneTestCase // sure they compile ok String adminPath = "http://localhost:"+port+context+"/"; - String html = IOUtils.toString( new URL(adminPath).openStream() ); - assertNotNull( html ); // real error will be an exception + byte[] bytes = IOUtils.toByteArray( new URL(adminPath).openStream() ); + assertNotNull( bytes ); // real error will be an exception adminPath += "admin/"; - html = IOUtils.toString( new URL(adminPath).openStream() ); - assertNotNull( html ); // real error will be an exception + bytes = IOUtils.toByteArray( new URL(adminPath).openStream() ); + assertNotNull( bytes ); // real error will be an exception // analysis - html = IOUtils.toString( new URL(adminPath+"analysis.jsp").openStream() ); - assertNotNull( html ); // real error will be an exception + bytes = IOUtils.toByteArray( new URL(adminPath+"analysis.jsp").openStream() ); + assertNotNull( bytes ); // real error will be an exception // schema browser - html = IOUtils.toString( new URL(adminPath+"schema.jsp").openStream() ); - assertNotNull( html ); // real error will be an exception + bytes = IOUtils.toByteArray( new URL(adminPath+"schema.jsp").openStream() ); + assertNotNull( bytes ); // real error will be an exception // schema browser - html = IOUtils.toString( new URL(adminPath+"threaddump.jsp").openStream() ); - assertNotNull( html ); // real error will be an exception + bytes = IOUtils.toByteArray( new URL(adminPath+"threaddump.jsp").openStream() ); + assertNotNull( bytes ); // real error will be an exception } } diff --git a/solr/src/test/org/apache/solr/client/solrj/embedded/TestSolrProperties.java b/solr/src/test/org/apache/solr/client/solrj/embedded/TestSolrProperties.java index 997f1cdcb92..937f8778481 100644 --- a/solr/src/test/org/apache/solr/client/solrj/embedded/TestSolrProperties.java +++ b/solr/src/test/org/apache/solr/client/solrj/embedded/TestSolrProperties.java @@ -66,6 +66,7 @@ public class TestSolrProperties extends LuceneTestCase { return "solr.xml"; } + @Override @Before public void setUp() throws Exception { super.setUp(); @@ -77,6 +78,7 @@ public class TestSolrProperties extends LuceneTestCase { cores = new CoreContainer(home.getAbsolutePath(), solrXml); } + @Override @After public void tearDown() throws Exception { if (cores != null) diff --git a/solr/src/test/org/apache/solr/client/solrj/response/DocumentAnalysisResponseTest.java b/solr/src/test/org/apache/solr/client/solrj/response/DocumentAnalysisResponseTest.java index fbfbafeab8e..5cbf67f0865 100644 --- a/solr/src/test/org/apache/solr/client/solrj/response/DocumentAnalysisResponseTest.java +++ b/solr/src/test/org/apache/solr/client/solrj/response/DocumentAnalysisResponseTest.java @@ -48,7 +48,7 @@ public class DocumentAnalysisResponseTest extends LuceneTestCase { DocumentAnalysisResponse response = new DocumentAnalysisResponse() { @Override - protected List buildPhases(NamedList phaseNL) { + protected List buildPhases(NamedList>> phaseNL) { return phases; } }; diff --git a/solr/src/test/org/apache/solr/client/solrj/response/FieldAnalysisResponseTest.java b/solr/src/test/org/apache/solr/client/solrj/response/FieldAnalysisResponseTest.java index 73b506dc0cd..f6f62aaa2f9 100644 --- a/solr/src/test/org/apache/solr/client/solrj/response/FieldAnalysisResponseTest.java +++ b/solr/src/test/org/apache/solr/client/solrj/response/FieldAnalysisResponseTest.java @@ -49,7 +49,7 @@ public class FieldAnalysisResponseTest extends LuceneTestCase { NamedList responseNL = buildResponse(); FieldAnalysisResponse response = new FieldAnalysisResponse() { @Override - protected List buildPhases(NamedList phaseNL) { + protected List buildPhases(NamedList>> phaseNL) { return phases; } }; diff --git a/solr/src/test/org/apache/solr/client/solrj/response/TermsResponseTest.java b/solr/src/test/org/apache/solr/client/solrj/response/TermsResponseTest.java index 16258416ba8..5e0eed5313b 100644 --- a/solr/src/test/org/apache/solr/client/solrj/response/TermsResponseTest.java +++ b/solr/src/test/org/apache/solr/client/solrj/response/TermsResponseTest.java @@ -24,6 +24,7 @@ import org.apache.solr.client.solrj.SolrQuery; import org.apache.solr.common.SolrInputDocument; import org.apache.solr.client.solrj.request.QueryRequest; import org.apache.solr.client.solrj.response.TermsResponse.Term; +import org.junit.Before; import org.junit.BeforeClass; import org.junit.Test; @@ -36,6 +37,15 @@ public class TermsResponseTest extends SolrJettyTestBase { public static void beforeTest() throws Exception { initCore(EXAMPLE_CONFIG, EXAMPLE_SCHEMA, EXAMPLE_HOME); } + + @Before + @Override + public void setUp() throws Exception{ + super.setUp(); + clearIndex(); + assertU(commit()); + assertU(optimize()); + } @Test public void testTermsResponse() throws Exception { diff --git a/solr/src/test/org/apache/solr/cloud/AbstractDistributedZkTestCase.java b/solr/src/test/org/apache/solr/cloud/AbstractDistributedZkTestCase.java index 053b07732ca..e7ced578ee8 100644 --- a/solr/src/test/org/apache/solr/cloud/AbstractDistributedZkTestCase.java +++ b/solr/src/test/org/apache/solr/cloud/AbstractDistributedZkTestCase.java @@ -1,92 +1,93 @@ -package org.apache.solr.cloud; - -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import java.io.File; - -import org.apache.solr.BaseDistributedSearchTestCase; -import org.apache.solr.client.solrj.embedded.JettySolrRunner; -import org.apache.solr.common.cloud.SolrZkClient; -import org.apache.solr.core.SolrConfig; -import org.junit.Before; - -public abstract class AbstractDistributedZkTestCase extends BaseDistributedSearchTestCase { - private static final boolean DEBUG = false; - protected ZkTestServer zkServer; - - @Before - @Override - public void setUp() throws Exception { - super.setUp(); - log.info("####SETUP_START " + getName()); - - ignoreException("java.nio.channels.ClosedChannelException"); - - String zkDir = testDir.getAbsolutePath() + File.separator - + "zookeeper/server1/data"; - zkServer = new ZkTestServer(zkDir); - zkServer.run(); - - System.setProperty("zkHost", zkServer.getZkAddress()); - - AbstractZkTestCase.buildZooKeeper(zkServer.getZkHost(), zkServer.getZkAddress(), "solrconfig.xml", "schema.xml"); - - // set some system properties for use by tests - System.setProperty("solr.test.sys.prop1", "propone"); - System.setProperty("solr.test.sys.prop2", "proptwo"); - } - - protected void createServers(int numShards) throws Exception { - System.setProperty("collection", "control_collection"); - controlJetty = createJetty(testDir, testDir + "/control/data", "control_shard"); - System.clearProperty("collection"); - controlClient = createNewSolrServer(controlJetty.getLocalPort()); - - StringBuilder sb = new StringBuilder(); - for (int i = 1; i <= numShards; i++) { - if (sb.length() > 0) sb.append(','); - JettySolrRunner j = createJetty(testDir, testDir + "/jetty" + i, "shard" + (i + 2)); - jettys.add(j); - clients.add(createNewSolrServer(j.getLocalPort())); - sb.append("localhost:").append(j.getLocalPort()).append(context); - } - - shards = sb.toString(); - } - - @Override - public void tearDown() throws Exception { - if (DEBUG) { - printLayout(); - } - zkServer.shutdown(); - System.clearProperty("zkHost"); - System.clearProperty("collection"); - System.clearProperty("solr.test.sys.prop1"); - System.clearProperty("solr.test.sys.prop2"); - super.tearDown(); - resetExceptionIgnores(); - SolrConfig.severeErrors.clear(); - } - - protected void printLayout() throws Exception { - SolrZkClient zkClient = new SolrZkClient(zkServer.getZkHost(), AbstractZkTestCase.TIMEOUT); - zkClient.printLayoutToStdOut(); - zkClient.close(); - } -} +package org.apache.solr.cloud; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.File; + +import org.apache.solr.BaseDistributedSearchTestCase; +import org.apache.solr.client.solrj.embedded.JettySolrRunner; +import org.apache.solr.common.cloud.SolrZkClient; +import org.apache.solr.core.SolrConfig; +import org.junit.Before; + +public abstract class AbstractDistributedZkTestCase extends BaseDistributedSearchTestCase { + private static final boolean DEBUG = false; + protected ZkTestServer zkServer; + + @Before + @Override + public void setUp() throws Exception { + super.setUp(); + log.info("####SETUP_START " + getName()); + + ignoreException("java.nio.channels.ClosedChannelException"); + + String zkDir = testDir.getAbsolutePath() + File.separator + + "zookeeper/server1/data"; + zkServer = new ZkTestServer(zkDir); + zkServer.run(); + + System.setProperty("zkHost", zkServer.getZkAddress()); + + AbstractZkTestCase.buildZooKeeper(zkServer.getZkHost(), zkServer.getZkAddress(), "solrconfig.xml", "schema.xml"); + + // set some system properties for use by tests + System.setProperty("solr.test.sys.prop1", "propone"); + System.setProperty("solr.test.sys.prop2", "proptwo"); + } + + @Override + protected void createServers(int numShards) throws Exception { + System.setProperty("collection", "control_collection"); + controlJetty = createJetty(testDir, testDir + "/control/data", "control_shard"); + System.clearProperty("collection"); + controlClient = createNewSolrServer(controlJetty.getLocalPort()); + + StringBuilder sb = new StringBuilder(); + for (int i = 1; i <= numShards; i++) { + if (sb.length() > 0) sb.append(','); + JettySolrRunner j = createJetty(testDir, testDir + "/jetty" + i, "shard" + (i + 2)); + jettys.add(j); + clients.add(createNewSolrServer(j.getLocalPort())); + sb.append("localhost:").append(j.getLocalPort()).append(context); + } + + shards = sb.toString(); + } + + @Override + public void tearDown() throws Exception { + if (DEBUG) { + printLayout(); + } + zkServer.shutdown(); + System.clearProperty("zkHost"); + System.clearProperty("collection"); + System.clearProperty("solr.test.sys.prop1"); + System.clearProperty("solr.test.sys.prop2"); + super.tearDown(); + resetExceptionIgnores(); + SolrConfig.severeErrors.clear(); + } + + protected void printLayout() throws Exception { + SolrZkClient zkClient = new SolrZkClient(zkServer.getZkHost(), AbstractZkTestCase.TIMEOUT); + zkClient.printLayoutToStdOut(); + zkClient.close(); + } +} diff --git a/solr/src/test/org/apache/solr/cloud/AbstractZkTestCase.java b/solr/src/test/org/apache/solr/cloud/AbstractZkTestCase.java index e67436e2b24..f7660f05cf0 100644 --- a/solr/src/test/org/apache/solr/cloud/AbstractZkTestCase.java +++ b/solr/src/test/org/apache/solr/cloud/AbstractZkTestCase.java @@ -1,129 +1,147 @@ -package org.apache.solr.cloud; - -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import java.io.File; -import java.io.IOException; - -import org.apache.solr.SolrTestCaseJ4; -import org.apache.solr.common.cloud.SolrZkClient; -import org.apache.solr.common.cloud.ZkNodeProps; -import org.apache.solr.core.SolrConfig; -import org.apache.zookeeper.CreateMode; -import org.junit.AfterClass; -import org.junit.BeforeClass; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** - * Base test class for ZooKeeper tests. - */ -public abstract class AbstractZkTestCase extends SolrTestCaseJ4 { - - static final int TIMEOUT = 10000; - - private static final boolean DEBUG = false; - - protected static Logger log = LoggerFactory - .getLogger(AbstractZkTestCase.class); - - protected static ZkTestServer zkServer; - - protected static String zkDir; - - - @BeforeClass - public static void azt_beforeClass() throws Exception { - createTempDir(); - zkDir = dataDir.getAbsolutePath() + File.separator - + "zookeeper/server1/data"; - zkServer = new ZkTestServer(zkDir); - zkServer.run(); - - System.setProperty("zkHost", zkServer.getZkAddress()); - System.setProperty("hostPort", "0000"); - - buildZooKeeper(zkServer.getZkHost(), zkServer.getZkAddress(), - "solrconfig.xml", "schema.xml"); - - initCore("solrconfig.xml", "schema.xml"); - } - - // static to share with distrib test - static void buildZooKeeper(String zkHost, String zkAddress, String config, - String schema) throws Exception { - SolrZkClient zkClient = new SolrZkClient(zkHost, AbstractZkTestCase.TIMEOUT); - zkClient.makePath("/solr"); - zkClient.close(); - - zkClient = new SolrZkClient(zkAddress, AbstractZkTestCase.TIMEOUT); - - ZkNodeProps props = new ZkNodeProps(); - props.put("configName", "conf1"); - zkClient.makePath("/collections/collection1", props.store(), CreateMode.PERSISTENT); - zkClient.makePath("/collections/collection1/shards", CreateMode.PERSISTENT); - - zkClient.makePath("/collections/control_collection", props.store(), CreateMode.PERSISTENT); - zkClient.makePath("/collections/control_collection/shards", CreateMode.PERSISTENT); - - putConfig(zkClient, config); - putConfig(zkClient, schema); - putConfig(zkClient, "stopwords.txt"); - putConfig(zkClient, "protwords.txt"); - putConfig(zkClient, "mapping-ISOLatin1Accent.txt"); - putConfig(zkClient, "old_synonyms.txt"); - putConfig(zkClient, "synonyms.txt"); - - zkClient.close(); - } - - private static void putConfig(SolrZkClient zkConnection, String name) - throws Exception { - zkConnection.setData("/configs/conf1/" + name, getFile("solr" - + File.separator + "conf" + File.separator + name)); - } - - public void tearDown() throws Exception { - if (DEBUG) { - printLayout(zkServer.getZkHost()); - } - - SolrConfig.severeErrors.clear(); - super.tearDown(); - } - - @AfterClass - public static void azt_afterClass() throws IOException { - zkServer.shutdown(); - System.clearProperty("zkHost"); - System.clearProperty("solr.test.sys.prop1"); - System.clearProperty("solr.test.sys.prop2"); - } - - protected void printLayout(String zkHost) throws Exception { - SolrZkClient zkClient = new SolrZkClient(zkHost, AbstractZkTestCase.TIMEOUT); - zkClient.printLayoutToStdOut(); - zkClient.close(); - } - - static void makeSolrZkNode(String zkHost) throws Exception { - SolrZkClient zkClient = new SolrZkClient(zkHost, TIMEOUT); - zkClient.makePath("/solr"); - zkClient.close(); - } -} +package org.apache.solr.cloud; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.File; +import java.io.IOException; +import java.util.List; + +import org.apache.solr.SolrTestCaseJ4; +import org.apache.solr.common.cloud.SolrZkClient; +import org.apache.solr.common.cloud.ZkNodeProps; +import org.apache.solr.core.SolrConfig; +import org.apache.zookeeper.CreateMode; +import org.junit.AfterClass; +import org.junit.BeforeClass; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Base test class for ZooKeeper tests. + */ +public abstract class AbstractZkTestCase extends SolrTestCaseJ4 { + + static final int TIMEOUT = 10000; + + private static final boolean DEBUG = false; + + protected static Logger log = LoggerFactory + .getLogger(AbstractZkTestCase.class); + + protected static ZkTestServer zkServer; + + protected static String zkDir; + + + @BeforeClass + public static void azt_beforeClass() throws Exception { + createTempDir(); + zkDir = dataDir.getAbsolutePath() + File.separator + + "zookeeper/server1/data"; + zkServer = new ZkTestServer(zkDir); + zkServer.run(); + + System.setProperty("zkHost", zkServer.getZkAddress()); + System.setProperty("hostPort", "0000"); + + buildZooKeeper(zkServer.getZkHost(), zkServer.getZkAddress(), + "solrconfig.xml", "schema.xml"); + + initCore("solrconfig.xml", "schema.xml"); + } + + // static to share with distrib test + static void buildZooKeeper(String zkHost, String zkAddress, String config, + String schema) throws Exception { + SolrZkClient zkClient = new SolrZkClient(zkHost, AbstractZkTestCase.TIMEOUT); + zkClient.makePath("/solr"); + zkClient.close(); + + zkClient = new SolrZkClient(zkAddress, AbstractZkTestCase.TIMEOUT); + + ZkNodeProps props = new ZkNodeProps(); + props.put("configName", "conf1"); + zkClient.makePath("/collections/collection1", props.store(), CreateMode.PERSISTENT); + zkClient.makePath("/collections/collection1/shards", CreateMode.PERSISTENT); + + zkClient.makePath("/collections/control_collection", props.store(), CreateMode.PERSISTENT); + zkClient.makePath("/collections/control_collection/shards", CreateMode.PERSISTENT); + + putConfig(zkClient, config); + putConfig(zkClient, schema); + putConfig(zkClient, "stopwords.txt"); + putConfig(zkClient, "protwords.txt"); + putConfig(zkClient, "mapping-ISOLatin1Accent.txt"); + putConfig(zkClient, "old_synonyms.txt"); + putConfig(zkClient, "synonyms.txt"); + + zkClient.close(); + } + + private static void putConfig(SolrZkClient zkConnection, String name) + throws Exception { + zkConnection.setData("/configs/conf1/" + name, getFile("solr" + + File.separator + "conf" + File.separator + name)); + } + + @Override + public void tearDown() throws Exception { + if (DEBUG) { + printLayout(zkServer.getZkHost()); + } + + SolrConfig.severeErrors.clear(); + super.tearDown(); + } + + @AfterClass + public static void azt_afterClass() throws IOException { + zkServer.shutdown(); + System.clearProperty("zkHost"); + System.clearProperty("solr.test.sys.prop1"); + System.clearProperty("solr.test.sys.prop2"); + } + + protected void printLayout(String zkHost) throws Exception { + SolrZkClient zkClient = new SolrZkClient(zkHost, AbstractZkTestCase.TIMEOUT); + zkClient.printLayoutToStdOut(); + zkClient.close(); + } + + static void makeSolrZkNode(String zkHost) throws Exception { + SolrZkClient zkClient = new SolrZkClient(zkHost, TIMEOUT); + zkClient.makePath("/solr"); + zkClient.close(); + } + + static void tryCleanSolrZkNode(String zkHost) throws Exception { + tryCleanPath(zkHost, "/solr"); + } + + static void tryCleanPath(String zkHost, String path) throws Exception { + SolrZkClient zkClient = new SolrZkClient(zkHost, TIMEOUT); + if (zkClient.exists(path)) { + List children = zkClient.getChildren(path, null); + for (String string : children) { + tryCleanPath(zkHost, path+"/"+string); + } + zkClient.delete(path, -1); + } + zkClient.close(); + } +} diff --git a/solr/src/test/org/apache/solr/cloud/BasicDistributedZkTest.java b/solr/src/test/org/apache/solr/cloud/BasicDistributedZkTest.java index fbcd378cf0e..2452a90f498 100644 --- a/solr/src/test/org/apache/solr/cloud/BasicDistributedZkTest.java +++ b/solr/src/test/org/apache/solr/cloud/BasicDistributedZkTest.java @@ -1,284 +1,284 @@ -package org.apache.solr.cloud; - -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import java.net.MalformedURLException; - -import org.apache.solr.SolrTestCaseJ4; -import org.apache.solr.client.solrj.SolrServerException; -import org.apache.solr.client.solrj.impl.CloudSolrServer; -import org.apache.solr.client.solrj.response.QueryResponse; -import org.apache.solr.common.params.CommonParams; -import org.apache.solr.common.params.ModifiableSolrParams; -import org.junit.BeforeClass; - -/** - * - */ -public class BasicDistributedZkTest extends AbstractDistributedZkTestCase { - - private static final String DEFAULT_COLLECTION = "collection1"; - private static final boolean DEBUG = false; - String t1="a_t"; - String i1="a_si"; - String nint = "n_i"; - String tint = "n_ti"; - String nfloat = "n_f"; - String tfloat = "n_tf"; - String ndouble = "n_d"; - String tdouble = "n_td"; - String nlong = "n_l"; - String tlong = "n_tl"; - String ndate = "n_dt"; - String tdate = "n_tdt"; - - String oddField="oddField_s"; - String missingField="ignore_exception__missing_but_valid_field_t"; - String invalidField="ignore_exception__invalid_field_not_in_schema"; - - public BasicDistributedZkTest() { - fixShardCount = true; - - System.setProperty("CLOUD_UPDATE_DELAY", "0"); - } - - - @BeforeClass - public static void beforeClass() throws Exception { - System.setProperty("solr.solr.home", SolrTestCaseJ4.TEST_HOME); - } - - @Override - protected void setDistributedParams(ModifiableSolrParams params) { - - if (r.nextBoolean()) { - // don't set shards, let that be figured out from the cloud state - params.set("distrib", "true"); - } else { - // use shard ids rather than physical locations - StringBuilder sb = new StringBuilder(); - for (int i = 0; i < shardCount; i++) { - if (i > 0) - sb.append(','); - sb.append("shard" + (i + 3)); - } - params.set("shards", sb.toString()); - params.set("distrib", "true"); - } - } - - @Override - public void doTest() throws Exception { - del("*:*"); - indexr(id,1, i1, 100, tlong, 100,t1,"now is the time for all good men" - ,"foo_f", 1.414f, "foo_b", "true", "foo_d", 1.414d); - indexr(id,2, i1, 50 , tlong, 50,t1,"to come to the aid of their country." - ); - indexr(id,3, i1, 2, tlong, 2,t1,"how now brown cow" - ); - indexr(id,4, i1, -100 ,tlong, 101,t1,"the quick fox jumped over the lazy dog" - ); - indexr(id,5, i1, 500, tlong, 500 ,t1,"the quick fox jumped way over the lazy dog" - ); - indexr(id,6, i1, -600, tlong, 600 ,t1,"humpty dumpy sat on a wall"); - indexr(id,7, i1, 123, tlong, 123 ,t1,"humpty dumpy had a great fall"); - indexr(id,8, i1, 876, tlong, 876,t1,"all the kings horses and all the kings men"); - indexr(id,9, i1, 7, tlong, 7,t1,"couldn't put humpty together again"); - indexr(id,10, i1, 4321, tlong, 4321,t1,"this too shall pass"); - indexr(id,11, i1, -987, tlong, 987,t1,"An eye for eye only ends up making the whole world blind."); - indexr(id,12, i1, 379, tlong, 379,t1,"Great works are performed, not by strength, but by perseverance."); - indexr(id,13, i1, 232, tlong, 232,t1,"no eggs on wall, lesson learned", oddField, "odd man out"); - - indexr(id, 14, "SubjectTerms_mfacet", new String[] {"mathematical models", "mathematical analysis"}); - indexr(id, 15, "SubjectTerms_mfacet", new String[] {"test 1", "test 2", "test3"}); - indexr(id, 16, "SubjectTerms_mfacet", new String[] {"test 1", "test 2", "test3"}); - String[] vals = new String[100]; - for (int i=0; i<100; i++) { - vals[i] = "test " + i; - } - indexr(id, 17, "SubjectTerms_mfacet", vals); - - for (int i=100; i<150; i++) { - indexr(id, i); - } - - commit(); - - handle.clear(); - handle.put("QTime", SKIPVAL); - handle.put("timestamp", SKIPVAL); - - // random value sort - for (String f : fieldNames) { - query("q","*:*", "sort",f+" desc"); - query("q","*:*", "sort",f+" asc"); - } - - // these queries should be exactly ordered and scores should exactly match - query("q","*:*", "sort",i1+" desc"); - query("q","*:*", "sort",i1+" asc"); - query("q","*:*", "sort",i1+" desc", "fl","*,score"); - query("q","*:*", "sort",tlong+" asc", "fl","score"); // test legacy behavior - "score"=="*,score" - query("q","*:*", "sort",tlong+" desc"); - handle.put("maxScore", SKIPVAL); - query("q","{!func}"+i1);// does not expect maxScore. So if it comes ,ignore it. JavaBinCodec.writeSolrDocumentList() - //is agnostic of request params. - handle.remove("maxScore"); - query("q","{!func}"+i1, "fl","*,score"); // even scores should match exactly here - - handle.put("highlighting", UNORDERED); - handle.put("response", UNORDERED); - - handle.put("maxScore", SKIPVAL); - query("q","quick"); - query("q","all","fl","id","start","0"); - query("q","all","fl","foofoofoo","start","0"); // no fields in returned docs - query("q","all","fl","id","start","100"); - - handle.put("score", SKIPVAL); - query("q","quick","fl","*,score"); - query("q","all","fl","*,score","start","1"); - query("q","all","fl","*,score","start","100"); - - query("q","now their fox sat had put","fl","*,score", - "hl","true","hl.fl",t1); - - query("q","now their fox sat had put","fl","foofoofoo", - "hl","true","hl.fl",t1); - - query("q","matchesnothing","fl","*,score"); - - query("q","*:*", "rows",100, "facet","true", "facet.field",t1); - query("q","*:*", "rows",100, "facet","true", "facet.field",t1, "facet.limit",-1, "facet.sort","count"); - query("q","*:*", "rows",100, "facet","true", "facet.field",t1, "facet.limit",-1, "facet.sort","count", "facet.mincount",2); - query("q","*:*", "rows",100, "facet","true", "facet.field",t1, "facet.limit",-1, "facet.sort","index"); - query("q","*:*", "rows",100, "facet","true", "facet.field",t1, "facet.limit",-1, "facet.sort","index", "facet.mincount",2); - query("q","*:*", "rows",100, "facet","true", "facet.field",t1,"facet.limit",1); - query("q","*:*", "rows",100, "facet","true", "facet.query","quick", "facet.query","all", "facet.query","*:*"); - query("q","*:*", "rows",100, "facet","true", "facet.field",t1, "facet.offset",1); - query("q","*:*", "rows",100, "facet","true", "facet.field",t1, "facet.mincount",2); - - // test faceting multiple things at once - query("q","*:*", "rows",100, "facet","true", "facet.query","quick", "facet.query","all", "facet.query","*:*" - ,"facet.field",t1); - - // test filter tagging, facet exclusion, and naming (multi-select facet support) - query("q","*:*", "rows",100, "facet","true", "facet.query","{!key=myquick}quick", "facet.query","{!key=myall ex=a}all", "facet.query","*:*" - ,"facet.field","{!key=mykey ex=a}"+t1 - ,"facet.field","{!key=other ex=b}"+t1 - ,"facet.field","{!key=again ex=a,b}"+t1 - ,"facet.field",t1 - ,"fq","{!tag=a}id:[1 TO 7]", "fq","{!tag=b}id:[3 TO 9]" - ); - query("q", "*:*", "facet", "true", "facet.field", "{!ex=t1}SubjectTerms_mfacet", "fq", "{!tag=t1}SubjectTerms_mfacet:(test 1)", "facet.limit", "10", "facet.mincount", "1"); - - // test field that is valid in schema but missing in all shards - query("q","*:*", "rows",100, "facet","true", "facet.field",missingField, "facet.mincount",2); - // test field that is valid in schema and missing in some shards - query("q","*:*", "rows",100, "facet","true", "facet.field",oddField, "facet.mincount",2); - - query("q","*:*", "sort",i1+" desc", "stats", "true", "stats.field", i1); - - /*** TODO: the failure may come back in "exception" - try { - // test error produced for field that is invalid for schema - query("q","*:*", "rows",100, "facet","true", "facet.field",invalidField, "facet.mincount",2); - TestCase.fail("SolrServerException expected for invalid field that is not in schema"); - } catch (SolrServerException ex) { - // expected - } - ***/ - - // Try to get better coverage for refinement queries by turning off over requesting. - // This makes it much more likely that we may not get the top facet values and hence - // we turn of that checking. - handle.put("facet_fields", SKIPVAL); - query("q","*:*", "rows",0, "facet","true", "facet.field",t1,"facet.limit",5, "facet.shard.limit",5); - // check a complex key name - query("q","*:*", "rows",0, "facet","true", "facet.field","{!key='a b/c \\' \\} foo'}"+t1,"facet.limit",5, "facet.shard.limit",5); - handle.remove("facet_fields"); - - - // index the same document to two servers and make sure things - // don't blow up. - if (clients.size()>=2) { - index(id,100, i1, 107 ,t1,"oh no, a duplicate!"); - for (int i=0; i 0) + sb.append(','); + sb.append("shard" + (i + 3)); + } + params.set("shards", sb.toString()); + params.set("distrib", "true"); + } + } + + @Override + public void doTest() throws Exception { + del("*:*"); + indexr(id,1, i1, 100, tlong, 100,t1,"now is the time for all good men" + ,"foo_f", 1.414f, "foo_b", "true", "foo_d", 1.414d); + indexr(id,2, i1, 50 , tlong, 50,t1,"to come to the aid of their country." + ); + indexr(id,3, i1, 2, tlong, 2,t1,"how now brown cow" + ); + indexr(id,4, i1, -100 ,tlong, 101,t1,"the quick fox jumped over the lazy dog" + ); + indexr(id,5, i1, 500, tlong, 500 ,t1,"the quick fox jumped way over the lazy dog" + ); + indexr(id,6, i1, -600, tlong, 600 ,t1,"humpty dumpy sat on a wall"); + indexr(id,7, i1, 123, tlong, 123 ,t1,"humpty dumpy had a great fall"); + indexr(id,8, i1, 876, tlong, 876,t1,"all the kings horses and all the kings men"); + indexr(id,9, i1, 7, tlong, 7,t1,"couldn't put humpty together again"); + indexr(id,10, i1, 4321, tlong, 4321,t1,"this too shall pass"); + indexr(id,11, i1, -987, tlong, 987,t1,"An eye for eye only ends up making the whole world blind."); + indexr(id,12, i1, 379, tlong, 379,t1,"Great works are performed, not by strength, but by perseverance."); + indexr(id,13, i1, 232, tlong, 232,t1,"no eggs on wall, lesson learned", oddField, "odd man out"); + + indexr(id, 14, "SubjectTerms_mfacet", new String[] {"mathematical models", "mathematical analysis"}); + indexr(id, 15, "SubjectTerms_mfacet", new String[] {"test 1", "test 2", "test3"}); + indexr(id, 16, "SubjectTerms_mfacet", new String[] {"test 1", "test 2", "test3"}); + String[] vals = new String[100]; + for (int i=0; i<100; i++) { + vals[i] = "test " + i; + } + indexr(id, 17, "SubjectTerms_mfacet", vals); + + for (int i=100; i<150; i++) { + indexr(id, i); + } + + commit(); + + handle.clear(); + handle.put("QTime", SKIPVAL); + handle.put("timestamp", SKIPVAL); + + // random value sort + for (String f : fieldNames) { + query("q","*:*", "sort",f+" desc"); + query("q","*:*", "sort",f+" asc"); + } + + // these queries should be exactly ordered and scores should exactly match + query("q","*:*", "sort",i1+" desc"); + query("q","*:*", "sort",i1+" asc"); + query("q","*:*", "sort",i1+" desc", "fl","*,score"); + query("q","*:*", "sort","n_tl1 asc", "fl","score"); // test legacy behavior - "score"=="*,score" + query("q","*:*", "sort","n_tl1 desc"); + handle.put("maxScore", SKIPVAL); + query("q","{!func}"+i1);// does not expect maxScore. So if it comes ,ignore it. JavaBinCodec.writeSolrDocumentList() + //is agnostic of request params. + handle.remove("maxScore"); + query("q","{!func}"+i1, "fl","*,score"); // even scores should match exactly here + + handle.put("highlighting", UNORDERED); + handle.put("response", UNORDERED); + + handle.put("maxScore", SKIPVAL); + query("q","quick"); + query("q","all","fl","id","start","0"); + query("q","all","fl","foofoofoo","start","0"); // no fields in returned docs + query("q","all","fl","id","start","100"); + + handle.put("score", SKIPVAL); + query("q","quick","fl","*,score"); + query("q","all","fl","*,score","start","1"); + query("q","all","fl","*,score","start","100"); + + query("q","now their fox sat had put","fl","*,score", + "hl","true","hl.fl",t1); + + query("q","now their fox sat had put","fl","foofoofoo", + "hl","true","hl.fl",t1); + + query("q","matchesnothing","fl","*,score"); + + query("q","*:*", "rows",100, "facet","true", "facet.field",t1); + query("q","*:*", "rows",100, "facet","true", "facet.field",t1, "facet.limit",-1, "facet.sort","count"); + query("q","*:*", "rows",100, "facet","true", "facet.field",t1, "facet.limit",-1, "facet.sort","count", "facet.mincount",2); + query("q","*:*", "rows",100, "facet","true", "facet.field",t1, "facet.limit",-1, "facet.sort","index"); + query("q","*:*", "rows",100, "facet","true", "facet.field",t1, "facet.limit",-1, "facet.sort","index", "facet.mincount",2); + query("q","*:*", "rows",100, "facet","true", "facet.field",t1,"facet.limit",1); + query("q","*:*", "rows",100, "facet","true", "facet.query","quick", "facet.query","all", "facet.query","*:*"); + query("q","*:*", "rows",100, "facet","true", "facet.field",t1, "facet.offset",1); + query("q","*:*", "rows",100, "facet","true", "facet.field",t1, "facet.mincount",2); + + // test faceting multiple things at once + query("q","*:*", "rows",100, "facet","true", "facet.query","quick", "facet.query","all", "facet.query","*:*" + ,"facet.field",t1); + + // test filter tagging, facet exclusion, and naming (multi-select facet support) + query("q","*:*", "rows",100, "facet","true", "facet.query","{!key=myquick}quick", "facet.query","{!key=myall ex=a}all", "facet.query","*:*" + ,"facet.field","{!key=mykey ex=a}"+t1 + ,"facet.field","{!key=other ex=b}"+t1 + ,"facet.field","{!key=again ex=a,b}"+t1 + ,"facet.field",t1 + ,"fq","{!tag=a}id:[1 TO 7]", "fq","{!tag=b}id:[3 TO 9]" + ); + query("q", "*:*", "facet", "true", "facet.field", "{!ex=t1}SubjectTerms_mfacet", "fq", "{!tag=t1}SubjectTerms_mfacet:(test 1)", "facet.limit", "10", "facet.mincount", "1"); + + // test field that is valid in schema but missing in all shards + query("q","*:*", "rows",100, "facet","true", "facet.field",missingField, "facet.mincount",2); + // test field that is valid in schema and missing in some shards + query("q","*:*", "rows",100, "facet","true", "facet.field",oddField, "facet.mincount",2); + + query("q","*:*", "sort",i1+" desc", "stats", "true", "stats.field", i1); + + /*** TODO: the failure may come back in "exception" + try { + // test error produced for field that is invalid for schema + query("q","*:*", "rows",100, "facet","true", "facet.field",invalidField, "facet.mincount",2); + TestCase.fail("SolrServerException expected for invalid field that is not in schema"); + } catch (SolrServerException ex) { + // expected + } + ***/ + + // Try to get better coverage for refinement queries by turning off over requesting. + // This makes it much more likely that we may not get the top facet values and hence + // we turn of that checking. + handle.put("facet_fields", SKIPVAL); + query("q","*:*", "rows",0, "facet","true", "facet.field",t1,"facet.limit",5, "facet.shard.limit",5); + // check a complex key name + query("q","*:*", "rows",0, "facet","true", "facet.field","{!key='a b/c \\' \\} foo'}"+t1,"facet.limit",5, "facet.shard.limit",5); + handle.remove("facet_fields"); + + + // index the same document to two servers and make sure things + // don't blow up. + if (clients.size()>=2) { + index(id,100, i1, 107 ,t1,"oh no, a duplicate!"); + for (int i=0; i slices = null; - for (int i = 75; i > 0; i--) { - cloudState2 = zkController2.getCloudState(); - slices = cloudState2.getSlices("testcore"); - - if (slices != null && slices.containsKey(host + ":1661_solr_testcore")) { - break; - } - Thread.sleep(500); - } - - assertNotNull(slices); - assertTrue(slices.containsKey(host + ":1661_solr_testcore")); - - Slice slice = slices.get(host + ":1661_solr_testcore"); - assertEquals(host + ":1661_solr_testcore", slice.getName()); - - Map shards = slice.getShards(); - - assertEquals(1, shards.size()); - - ZkNodeProps zkProps = shards.get(host + ":1661_solr_testcore"); - - assertNotNull(zkProps); - - assertEquals(host + ":1661_solr", zkProps.get("node_name")); - - assertEquals("http://" + host + ":1661/solr/testcore", zkProps.get("url")); - - Set liveNodes = cloudState2.getLiveNodes(); - assertNotNull(liveNodes); - assertEquals(3, liveNodes.size()); - - container3.shutdown(); - - // slight pause (15s timeout) for watch to trigger - for(int i = 0; i < (5 * 15); i++) { - if(zkController2.getCloudState().getLiveNodes().size() == 2) { - break; - } - Thread.sleep(200); - } - - assertEquals(2, zkController2.getCloudState().getLiveNodes().size()); - - // quickly kill / start client - - container2.getZkController().getZkClient().getSolrZooKeeper().getConnection() - .disconnect(); - container2.shutdown(); - - container2 = init2.initialize(); - - // pause for watch to trigger - for(int i = 0; i < 200; i++) { - if (container1.getZkController().getCloudState().liveNodesContain( - container2.getZkController().getNodeName())) { - break; - } - Thread.sleep(100); - } - - assertTrue(container1.getZkController().getCloudState().liveNodesContain( - container2.getZkController().getNodeName())); - - } - - public void tearDown() throws Exception { - if (VERBOSE) { - printLayout(zkServer.getZkHost()); - } - container1.shutdown(); - container2.shutdown(); - container3.shutdown(); - zkServer.shutdown(); - super.tearDown(); - System.clearProperty("zkClientTimeout"); - System.clearProperty("zkHost"); - System.clearProperty("hostPort"); - System.clearProperty("CLOUD_UPDATE_DELAY"); - SolrConfig.severeErrors.clear(); - } - - private void printLayout(String zkHost) throws Exception { - SolrZkClient zkClient = new SolrZkClient( - zkHost, AbstractZkTestCase.TIMEOUT); - zkClient.printLayoutToStdOut(); - zkClient.close(); - } -} +package org.apache.solr.cloud; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.File; +import java.util.Map; +import java.util.Set; + +import org.apache.solr.SolrTestCaseJ4; +import org.apache.solr.common.cloud.CloudState; +import org.apache.solr.common.cloud.Slice; +import org.apache.solr.common.cloud.SolrZkClient; +import org.apache.solr.common.cloud.ZkNodeProps; +import org.apache.solr.core.CoreContainer; +import org.apache.solr.core.CoreContainer.Initializer; +import org.apache.solr.core.CoreDescriptor; +import org.apache.solr.core.SolrConfig; +import org.apache.solr.core.SolrCore; +import org.apache.zookeeper.CreateMode; +import org.junit.BeforeClass; +import org.junit.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * TODO: look at hostPort used below + */ +public class CloudStateUpdateTest extends SolrTestCaseJ4 { + protected static Logger log = LoggerFactory + .getLogger(AbstractZkTestCase.class); + + private static final boolean VERBOSE = false; + + protected ZkTestServer zkServer; + + protected String zkDir; + + private CoreContainer container1; + + private CoreContainer container2; + + private CoreContainer container3; + + private File dataDir1; + + private File dataDir2; + + private File dataDir3; + + private File dataDir4; + + private Initializer init2; + + @BeforeClass + public static void beforeClass() throws Exception { + initCore(); + } + + @Override + public void setUp() throws Exception { + super.setUp(); + + System.setProperty("zkClientTimeout", "3000"); + + zkDir = dataDir.getAbsolutePath() + File.separator + + "zookeeper/server1/data"; + zkServer = new ZkTestServer(zkDir); + zkServer.run(); + System.setProperty("zkHost", zkServer.getZkAddress()); + AbstractZkTestCase.buildZooKeeper(zkServer.getZkHost(), zkServer + .getZkAddress(), "solrconfig.xml", "schema.xml"); + + log.info("####SETUP_START " + getName()); + dataDir1 = new File(dataDir + File.separator + "data1"); + dataDir1.mkdirs(); + + dataDir2 = new File(dataDir + File.separator + "data2"); + dataDir2.mkdirs(); + + dataDir3 = new File(dataDir + File.separator + "data3"); + dataDir3.mkdirs(); + + dataDir4 = new File(dataDir + File.separator + "data3"); + dataDir4.mkdirs(); + + // set some system properties for use by tests + System.setProperty("solr.test.sys.prop1", "propone"); + System.setProperty("solr.test.sys.prop2", "proptwo"); + + System.setProperty("hostPort", "1661"); + CoreContainer.Initializer init1 = new CoreContainer.Initializer() { + { + this.dataDir = CloudStateUpdateTest.this.dataDir1.getAbsolutePath(); + } + }; + + container1 = init1.initialize(); + System.clearProperty("hostPort"); + + System.setProperty("hostPort", "1662"); + init2 = new CoreContainer.Initializer() { + { + this.dataDir = CloudStateUpdateTest.this.dataDir2.getAbsolutePath(); + } + }; + + container2 = init2.initialize(); + System.clearProperty("hostPort"); + + System.setProperty("hostPort", "1663"); + CoreContainer.Initializer init3 = new CoreContainer.Initializer() { + { + this.dataDir = CloudStateUpdateTest.this.dataDir3.getAbsolutePath(); + } + }; + container3 = init3.initialize(); + System.clearProperty("hostPort"); + + log.info("####SETUP_END " + getName()); + + } + + @Test + public void testCoreRegistration() throws Exception { + System.setProperty("CLOUD_UPDATE_DELAY", "1"); + + ZkNodeProps props2 = new ZkNodeProps(); + props2.put("configName", "conf1"); + + SolrZkClient zkClient = new SolrZkClient(zkServer.getZkAddress(), AbstractZkTestCase.TIMEOUT); + zkClient.makePath("/collections/testcore", props2.store(), CreateMode.PERSISTENT); + zkClient.makePath("/collections/testcore/shards", CreateMode.PERSISTENT); + zkClient.close(); + + CoreDescriptor dcore = new CoreDescriptor(container1, "testcore", + "testcore"); + + dcore.setDataDir(dataDir4.getAbsolutePath()); + + SolrCore core = container1.create(dcore); + container1.register(core, false); + + ZkController zkController2 = container2.getZkController(); + + String host = zkController2.getHostName(); + + // slight pause - TODO: takes an oddly long amount of time to schedule tasks + // with almost no delay ... + CloudState cloudState2 = null; + Map slices = null; + for (int i = 75; i > 0; i--) { + cloudState2 = zkController2.getCloudState(); + slices = cloudState2.getSlices("testcore"); + + if (slices != null && slices.containsKey(host + ":1661_solr_testcore")) { + break; + } + Thread.sleep(500); + } + + assertNotNull(slices); + assertTrue(slices.containsKey(host + ":1661_solr_testcore")); + + Slice slice = slices.get(host + ":1661_solr_testcore"); + assertEquals(host + ":1661_solr_testcore", slice.getName()); + + Map shards = slice.getShards(); + + assertEquals(1, shards.size()); + + ZkNodeProps zkProps = shards.get(host + ":1661_solr_testcore"); + + assertNotNull(zkProps); + + assertEquals(host + ":1661_solr", zkProps.get("node_name")); + + assertEquals("http://" + host + ":1661/solr/testcore", zkProps.get("url")); + + Set liveNodes = cloudState2.getLiveNodes(); + assertNotNull(liveNodes); + assertEquals(3, liveNodes.size()); + + container3.shutdown(); + + // slight pause (15s timeout) for watch to trigger + for(int i = 0; i < (5 * 15); i++) { + if(zkController2.getCloudState().getLiveNodes().size() == 2) { + break; + } + Thread.sleep(200); + } + + assertEquals(2, zkController2.getCloudState().getLiveNodes().size()); + + // quickly kill / start client + + container2.getZkController().getZkClient().getSolrZooKeeper().getConnection() + .disconnect(); + container2.shutdown(); + + container2 = init2.initialize(); + + // pause for watch to trigger + for(int i = 0; i < 200; i++) { + if (container1.getZkController().getCloudState().liveNodesContain( + container2.getZkController().getNodeName())) { + break; + } + Thread.sleep(100); + } + + assertTrue(container1.getZkController().getCloudState().liveNodesContain( + container2.getZkController().getNodeName())); + + } + + @Override + public void tearDown() throws Exception { + if (VERBOSE) { + printLayout(zkServer.getZkHost()); + } + container1.shutdown(); + container2.shutdown(); + container3.shutdown(); + zkServer.shutdown(); + super.tearDown(); + System.clearProperty("zkClientTimeout"); + System.clearProperty("zkHost"); + System.clearProperty("hostPort"); + System.clearProperty("CLOUD_UPDATE_DELAY"); + SolrConfig.severeErrors.clear(); + } + + private void printLayout(String zkHost) throws Exception { + SolrZkClient zkClient = new SolrZkClient( + zkHost, AbstractZkTestCase.TIMEOUT); + zkClient.printLayoutToStdOut(); + zkClient.close(); + } +} diff --git a/solr/src/test/org/apache/solr/cloud/ZkControllerTest.java b/solr/src/test/org/apache/solr/cloud/ZkControllerTest.java index 785ea206c90..5b74b0926ac 100644 --- a/solr/src/test/org/apache/solr/cloud/ZkControllerTest.java +++ b/solr/src/test/org/apache/solr/cloud/ZkControllerTest.java @@ -1,224 +1,225 @@ -package org.apache.solr.cloud; - -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with this - * work for additional information regarding copyright ownership. The ASF - * licenses this file to You under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations under - * the License. - */ - -import java.io.File; -import java.io.IOException; -import java.util.Map; - -import org.apache.solr.SolrTestCaseJ4; -import org.apache.solr.common.cloud.CloudState; -import org.apache.solr.common.cloud.Slice; -import org.apache.solr.common.cloud.SolrZkClient; -import org.apache.solr.common.cloud.ZkNodeProps; -import org.apache.solr.common.cloud.ZkStateReader; -import org.apache.solr.core.SolrConfig; -import org.apache.zookeeper.CreateMode; -import org.apache.zookeeper.KeeperException; - -import org.junit.BeforeClass; -import org.junit.Test; - -public class ZkControllerTest extends SolrTestCaseJ4 { - - private static final String TEST_NODE_NAME = "test_node_name"; - - private static final String URL3 = "http://localhost:3133/solr/core1"; - - private static final String URL2 = "http://localhost:3123/solr/core1"; - - private static final String SHARD3 = "localhost:3123_solr_core3"; - - private static final String SHARD2 = "localhost:3123_solr_core2"; - - private static final String SHARD1 = "localhost:3123_solr_core1"; - - private static final String COLLECTION_NAME = "collection1"; - - static final int TIMEOUT = 10000; - - private static final String URL1 = "http://localhost:3133/solr/core0"; - - private static final boolean DEBUG = false; - - @BeforeClass - public static void beforeClass() throws Exception { - initCore(); - } - - @Test - public void testReadShards() throws Exception { - String zkDir = dataDir.getAbsolutePath() + File.separator - + "zookeeper/server1/data"; - ZkTestServer server = null; - SolrZkClient zkClient = null; - ZkController zkController = null; - try { - server = new ZkTestServer(zkDir); - server.run(); - - AbstractZkTestCase.makeSolrZkNode(server.getZkHost()); - - zkClient = new SolrZkClient(server.getZkAddress(), TIMEOUT); - String shardsPath = "/collections/collection1/shards/shardid1"; - zkClient.makePath(shardsPath); - - addShardToZk(zkClient, shardsPath, SHARD1, URL1); - addShardToZk(zkClient, shardsPath, SHARD2, URL2); - addShardToZk(zkClient, shardsPath, SHARD3, URL3); - - if (DEBUG) { - zkClient.printLayoutToStdOut(); - } - - zkController = new ZkController(server.getZkAddress(), - TIMEOUT, 1000, "localhost", "8983", "solr"); - - zkController.getZkStateReader().updateCloudState(true); - CloudState cloudInfo = zkController.getCloudState(); - Map slices = cloudInfo.getSlices("collection1"); - assertNotNull(slices); - - for (Slice slice : slices.values()) { - Map shards = slice.getShards(); - if (DEBUG) { - for (String shardName : shards.keySet()) { - ZkNodeProps props = shards.get(shardName); - System.out.println("shard:" + shardName); - System.out.println("props:" + props.toString()); - } - } - assertNotNull(shards.get(SHARD1)); - assertNotNull(shards.get(SHARD2)); - assertNotNull(shards.get(SHARD3)); - - ZkNodeProps props = shards.get(SHARD1); - assertEquals(URL1, props.get(ZkStateReader.URL_PROP)); - assertEquals(TEST_NODE_NAME, props.get(ZkStateReader.NODE_NAME)); - - props = shards.get(SHARD2); - assertEquals(URL2, props.get(ZkStateReader.URL_PROP)); - assertEquals(TEST_NODE_NAME, props.get(ZkStateReader.NODE_NAME)); - - props = shards.get(SHARD3); - assertEquals(URL3, props.get(ZkStateReader.URL_PROP)); - assertEquals(TEST_NODE_NAME, props.get(ZkStateReader.NODE_NAME)); - - } - - } finally { - if (zkClient != null) { - zkClient.close(); - } - if (zkController != null) { - zkController.close(); - } - if (server != null) { - server.shutdown(); - } - } - } - - @Test - public void testReadConfigName() throws Exception { - String zkDir = dataDir.getAbsolutePath() + File.separator - + "zookeeper/server1/data"; - - ZkTestServer server = new ZkTestServer(zkDir); - try { - server.run(); - - AbstractZkTestCase.makeSolrZkNode(server.getZkHost()); - - SolrZkClient zkClient = new SolrZkClient(server.getZkAddress(), TIMEOUT); - String actualConfigName = "firstConfig"; - - zkClient.makePath(ZkController.CONFIGS_ZKNODE + "/" + actualConfigName); - - ZkNodeProps props = new ZkNodeProps(); - props.put("configName", actualConfigName); - zkClient.makePath(ZkStateReader.COLLECTIONS_ZKNODE + "/" + COLLECTION_NAME , props.store(), CreateMode.PERSISTENT); - - if (DEBUG) { - zkClient.printLayoutToStdOut(); - } - zkClient.close(); - ZkController zkController = new ZkController(server.getZkAddress(), TIMEOUT, TIMEOUT, - "localhost", "8983", "/solr"); - try { - String configName = zkController.readConfigName(COLLECTION_NAME); - assertEquals(configName, actualConfigName); - } finally { - zkController.close(); - } - } finally { - - server.shutdown(); - } - - } - - @Test - public void testUploadToCloud() throws Exception { - String zkDir = dataDir.getAbsolutePath() + File.separator - + "zookeeper/server1/data"; - - ZkTestServer server = new ZkTestServer(zkDir); - ZkController zkController = null; - try { - server.run(); - - AbstractZkTestCase.makeSolrZkNode(server.getZkHost()); - - zkController = new ZkController(server.getZkAddress(), - TIMEOUT, 1000, "localhost", "8983", "/solr"); - - zkController.uploadToZK(getFile("solr/conf"), - ZkController.CONFIGS_ZKNODE + "/config1"); - - if (DEBUG) { - zkController.printLayoutToStdOut(); - } - - } finally { - if (zkController != null) { - zkController.close(); - } - server.shutdown(); - } - - } - - private void addShardToZk(SolrZkClient zkClient, String shardsPath, - String zkNodeName, String url) throws IOException, - KeeperException, InterruptedException { - - ZkNodeProps props = new ZkNodeProps(); - props.put(ZkStateReader.URL_PROP, url); - props.put(ZkStateReader.NODE_NAME, TEST_NODE_NAME); - byte[] bytes = props.store(); - - zkClient - .create(shardsPath + "/" + zkNodeName, bytes, CreateMode.PERSISTENT); - } - - public void tearDown() throws Exception { - SolrConfig.severeErrors.clear(); - super.tearDown(); - } -} +package org.apache.solr.cloud; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to You under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ + +import java.io.File; +import java.io.IOException; +import java.util.Map; + +import org.apache.solr.SolrTestCaseJ4; +import org.apache.solr.common.cloud.CloudState; +import org.apache.solr.common.cloud.Slice; +import org.apache.solr.common.cloud.SolrZkClient; +import org.apache.solr.common.cloud.ZkNodeProps; +import org.apache.solr.common.cloud.ZkStateReader; +import org.apache.solr.core.SolrConfig; +import org.apache.zookeeper.CreateMode; +import org.apache.zookeeper.KeeperException; + +import org.junit.BeforeClass; +import org.junit.Test; + +public class ZkControllerTest extends SolrTestCaseJ4 { + + private static final String TEST_NODE_NAME = "test_node_name"; + + private static final String URL3 = "http://localhost:3133/solr/core1"; + + private static final String URL2 = "http://localhost:3123/solr/core1"; + + private static final String SHARD3 = "localhost:3123_solr_core3"; + + private static final String SHARD2 = "localhost:3123_solr_core2"; + + private static final String SHARD1 = "localhost:3123_solr_core1"; + + private static final String COLLECTION_NAME = "collection1"; + + static final int TIMEOUT = 10000; + + private static final String URL1 = "http://localhost:3133/solr/core0"; + + private static final boolean DEBUG = false; + + @BeforeClass + public static void beforeClass() throws Exception { + initCore(); + } + + @Test + public void testReadShards() throws Exception { + String zkDir = dataDir.getAbsolutePath() + File.separator + + "zookeeper/server1/data"; + ZkTestServer server = null; + SolrZkClient zkClient = null; + ZkController zkController = null; + try { + server = new ZkTestServer(zkDir); + server.run(); + AbstractZkTestCase.tryCleanSolrZkNode(server.getZkHost()); + AbstractZkTestCase.makeSolrZkNode(server.getZkHost()); + + zkClient = new SolrZkClient(server.getZkAddress(), TIMEOUT); + String shardsPath = "/collections/collection1/shards/shardid1"; + zkClient.makePath(shardsPath); + + addShardToZk(zkClient, shardsPath, SHARD1, URL1); + addShardToZk(zkClient, shardsPath, SHARD2, URL2); + addShardToZk(zkClient, shardsPath, SHARD3, URL3); + + if (DEBUG) { + zkClient.printLayoutToStdOut(); + } + + zkController = new ZkController(server.getZkAddress(), + TIMEOUT, 1000, "localhost", "8983", "solr"); + + zkController.getZkStateReader().updateCloudState(true); + CloudState cloudInfo = zkController.getCloudState(); + Map slices = cloudInfo.getSlices("collection1"); + assertNotNull(slices); + + for (Slice slice : slices.values()) { + Map shards = slice.getShards(); + if (DEBUG) { + for (String shardName : shards.keySet()) { + ZkNodeProps props = shards.get(shardName); + System.out.println("shard:" + shardName); + System.out.println("props:" + props.toString()); + } + } + assertNotNull(shards.get(SHARD1)); + assertNotNull(shards.get(SHARD2)); + assertNotNull(shards.get(SHARD3)); + + ZkNodeProps props = shards.get(SHARD1); + assertEquals(URL1, props.get(ZkStateReader.URL_PROP)); + assertEquals(TEST_NODE_NAME, props.get(ZkStateReader.NODE_NAME)); + + props = shards.get(SHARD2); + assertEquals(URL2, props.get(ZkStateReader.URL_PROP)); + assertEquals(TEST_NODE_NAME, props.get(ZkStateReader.NODE_NAME)); + + props = shards.get(SHARD3); + assertEquals(URL3, props.get(ZkStateReader.URL_PROP)); + assertEquals(TEST_NODE_NAME, props.get(ZkStateReader.NODE_NAME)); + + } + + } finally { + if (zkClient != null) { + zkClient.close(); + } + if (zkController != null) { + zkController.close(); + } + if (server != null) { + server.shutdown(); + } + } + } + + @Test + public void testReadConfigName() throws Exception { + String zkDir = dataDir.getAbsolutePath() + File.separator + + "zookeeper/server1/data"; + + ZkTestServer server = new ZkTestServer(zkDir); + try { + server.run(); + + AbstractZkTestCase.makeSolrZkNode(server.getZkHost()); + + SolrZkClient zkClient = new SolrZkClient(server.getZkAddress(), TIMEOUT); + String actualConfigName = "firstConfig"; + + zkClient.makePath(ZkController.CONFIGS_ZKNODE + "/" + actualConfigName); + + ZkNodeProps props = new ZkNodeProps(); + props.put("configName", actualConfigName); + zkClient.makePath(ZkStateReader.COLLECTIONS_ZKNODE + "/" + COLLECTION_NAME , props.store(), CreateMode.PERSISTENT); + + if (DEBUG) { + zkClient.printLayoutToStdOut(); + } + zkClient.close(); + ZkController zkController = new ZkController(server.getZkAddress(), TIMEOUT, TIMEOUT, + "localhost", "8983", "/solr"); + try { + String configName = zkController.readConfigName(COLLECTION_NAME); + assertEquals(configName, actualConfigName); + } finally { + zkController.close(); + } + } finally { + + server.shutdown(); + } + + } + + @Test + public void testUploadToCloud() throws Exception { + String zkDir = dataDir.getAbsolutePath() + File.separator + + "zookeeper/server1/data"; + + ZkTestServer server = new ZkTestServer(zkDir); + ZkController zkController = null; + try { + server.run(); + + AbstractZkTestCase.makeSolrZkNode(server.getZkHost()); + + zkController = new ZkController(server.getZkAddress(), + TIMEOUT, 1000, "localhost", "8983", "/solr"); + + zkController.uploadToZK(getFile("solr/conf"), + ZkController.CONFIGS_ZKNODE + "/config1"); + + if (DEBUG) { + zkController.printLayoutToStdOut(); + } + + } finally { + if (zkController != null) { + zkController.close(); + } + server.shutdown(); + } + + } + + private void addShardToZk(SolrZkClient zkClient, String shardsPath, + String zkNodeName, String url) throws IOException, + KeeperException, InterruptedException { + + ZkNodeProps props = new ZkNodeProps(); + props.put(ZkStateReader.URL_PROP, url); + props.put(ZkStateReader.NODE_NAME, TEST_NODE_NAME); + byte[] bytes = props.store(); + + zkClient + .create(shardsPath + "/" + zkNodeName, bytes, CreateMode.PERSISTENT); + } + + @Override + public void tearDown() throws Exception { + SolrConfig.severeErrors.clear(); + super.tearDown(); + } +} diff --git a/solr/src/test/org/apache/solr/cloud/ZkNodePropsTest.java b/solr/src/test/org/apache/solr/cloud/ZkNodePropsTest.java index e7516e2338f..3da482c4cf7 100644 --- a/solr/src/test/org/apache/solr/cloud/ZkNodePropsTest.java +++ b/solr/src/test/org/apache/solr/cloud/ZkNodePropsTest.java @@ -1,49 +1,49 @@ -package org.apache.solr.cloud; - -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with this - * work for additional information regarding copyright ownership. The ASF - * licenses this file to You under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations under - * the License. - */ - -import java.io.IOException; - -import org.apache.solr.SolrTestCaseJ4; -import org.apache.solr.common.cloud.ZkNodeProps; -import org.junit.Test; - - -public class ZkNodePropsTest extends SolrTestCaseJ4 { - @Test - public void testBasic() throws IOException { - - ZkNodeProps props = new ZkNodeProps(); - props.put("prop1", "value1"); - props.put("prop2", "value2"); - props.put("prop3", "value3"); - props.put("prop4", "value4"); - props.put("prop5", "value5"); - props.put("prop6", "value6"); - byte[] bytes = props.store(); - - ZkNodeProps props2 = new ZkNodeProps(); - props2.load(bytes); - assertEquals("value1", props2.get("prop1")); - assertEquals("value2", props2.get("prop2")); - assertEquals("value3", props2.get("prop3")); - assertEquals("value4", props2.get("prop4")); - assertEquals("value5", props2.get("prop5")); - assertEquals("value6", props2.get("prop6")); - } -} +package org.apache.solr.cloud; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to You under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ + +import java.io.IOException; + +import org.apache.solr.SolrTestCaseJ4; +import org.apache.solr.common.cloud.ZkNodeProps; +import org.junit.Test; + + +public class ZkNodePropsTest extends SolrTestCaseJ4 { + @Test + public void testBasic() throws IOException { + + ZkNodeProps props = new ZkNodeProps(); + props.put("prop1", "value1"); + props.put("prop2", "value2"); + props.put("prop3", "value3"); + props.put("prop4", "value4"); + props.put("prop5", "value5"); + props.put("prop6", "value6"); + byte[] bytes = props.store(); + + ZkNodeProps props2 = new ZkNodeProps(); + props2.load(bytes); + assertEquals("value1", props2.get("prop1")); + assertEquals("value2", props2.get("prop2")); + assertEquals("value3", props2.get("prop3")); + assertEquals("value4", props2.get("prop4")); + assertEquals("value5", props2.get("prop5")); + assertEquals("value6", props2.get("prop6")); + } +} diff --git a/solr/src/test/org/apache/solr/cloud/ZkSolrClientTest.java b/solr/src/test/org/apache/solr/cloud/ZkSolrClientTest.java index c8a0bce9b1b..7358e1987b1 100644 --- a/solr/src/test/org/apache/solr/cloud/ZkSolrClientTest.java +++ b/solr/src/test/org/apache/solr/cloud/ZkSolrClientTest.java @@ -1,240 +1,241 @@ -package org.apache.solr.cloud; - -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with this - * work for additional information regarding copyright ownership. The ASF - * licenses this file to You under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations under - * the License. - */ - -import java.io.File; -import java.util.concurrent.atomic.AtomicInteger; - -import junit.framework.TestCase; - -import org.apache.solr.common.cloud.SolrZkClient; -import org.apache.solr.core.SolrConfig; -import org.apache.solr.util.AbstractSolrTestCase; -import org.apache.zookeeper.KeeperException; -import org.apache.zookeeper.WatchedEvent; -import org.apache.zookeeper.Watcher; - -public class ZkSolrClientTest extends AbstractSolrTestCase { - private static final boolean DEBUG = false; - - public void testConnect() throws Exception { - String zkDir = dataDir.getAbsolutePath() + File.separator - + "zookeeper/server1/data"; - ZkTestServer server = null; - - server = new ZkTestServer(zkDir); - server.run(); - - SolrZkClient zkClient = new SolrZkClient(server.getZkAddress(), 100); - - zkClient.close(); - server.shutdown(); - } - - public void testMakeRootNode() throws Exception { - String zkDir = dataDir.getAbsolutePath() + File.separator - + "zookeeper/server1/data"; - ZkTestServer server = null; - - server = new ZkTestServer(zkDir); - server.run(); - - AbstractZkTestCase.makeSolrZkNode(server.getZkHost()); - - SolrZkClient zkClient = new SolrZkClient(server.getZkHost(), - AbstractZkTestCase.TIMEOUT); - - assertTrue(zkClient.exists("/solr")); - - zkClient.close(); - server.shutdown(); - } - - public void testReconnect() throws Exception { - String zkDir = dataDir.getAbsolutePath() + File.separator - + "zookeeper/server1/data"; - ZkTestServer server = null; - SolrZkClient zkClient = null; - try { - server = new ZkTestServer(zkDir); - server.run(); - - AbstractZkTestCase.makeSolrZkNode(server.getZkHost()); - - zkClient = new SolrZkClient(server.getZkAddress(), AbstractZkTestCase.TIMEOUT); - String shardsPath = "/collections/collection1/shards"; - zkClient.makePath(shardsPath); - - zkClient.makePath("collections/collection1"); - int zkServerPort = server.getPort(); - // this tests disconnect state - server.shutdown(); - - Thread.sleep(80); - - - try { - zkClient.makePath("collections/collection2"); - TestCase.fail("Server should be down here"); - } catch (KeeperException.ConnectionLossException e) { - - } - - // bring server back up - server = new ZkTestServer(zkDir, zkServerPort); - server.run(); - - // TODO: can we do better? - // wait for reconnect - Thread.sleep(600); - - try { - zkClient.makePath("collections/collection3"); - } catch (KeeperException.ConnectionLossException e) { - Thread.sleep(5000); // try again in a bit - zkClient.makePath("collections/collection3"); - } - - if (DEBUG) { - zkClient.printLayoutToStdOut(); - } - - assertNotNull(zkClient.exists("/collections/collection3", null)); - assertNotNull(zkClient.exists("/collections/collection1", null)); - - // simulate session expiration - - // one option - long sessionId = zkClient.getSolrZooKeeper().getSessionId(); - server.expire(sessionId); - - // another option - //zkClient.getSolrZooKeeper().getConnection().disconnect(); - - // this tests expired state - - Thread.sleep(1000); // pause for reconnect - - for (int i = 0; i < 8; i++) { - try { - zkClient.makePath("collections/collection4"); - break; - } catch (KeeperException.SessionExpiredException e) { - - } catch (KeeperException.ConnectionLossException e) { - - } - Thread.sleep(1000 * i); - } - - if (DEBUG) { - zkClient.printLayoutToStdOut(); - } - - assertNotNull("Node does not exist, but it should", zkClient.exists("/collections/collection4", null)); - - } finally { - - if (zkClient != null) { - zkClient.close(); - } - if (server != null) { - server.shutdown(); - } - } - } - - public void testWatchChildren() throws Exception { - String zkDir = dataDir.getAbsolutePath() + File.separator - + "zookeeper/server1/data"; - - final AtomicInteger cnt = new AtomicInteger(); - ZkTestServer server = new ZkTestServer(zkDir); - server.run(); - Thread.sleep(400); - AbstractZkTestCase.makeSolrZkNode(server.getZkHost()); - final SolrZkClient zkClient = new SolrZkClient(server.getZkAddress(), AbstractZkTestCase.TIMEOUT); - try { - zkClient.makePath("/collections"); - - zkClient.getChildren("/collections", new Watcher() { - - public void process(WatchedEvent event) { - if (DEBUG) { - System.out.println("children changed"); - } - cnt.incrementAndGet(); - // remake watch - try { - zkClient.getChildren("/collections", this); - } catch (KeeperException e) { - throw new RuntimeException(e); - } catch (InterruptedException e) { - throw new RuntimeException(e); - } - } - }); - - zkClient.makePath("/collections/collection99/shards"); - - zkClient.makePath("collections/collection99/config=collection1"); - - zkClient.makePath("collections/collection99/config=collection3"); - - zkClient.makePath("/collections/collection97/shards"); - - if (DEBUG) { - zkClient.printLayoutToStdOut(); - } - - // pause for the watches to fire - Thread.sleep(700); - - if (cnt.intValue() < 2) { - Thread.sleep(4000); // wait a bit more - } - - assertEquals(2, cnt.intValue()); - - } finally { - - if (zkClient != null) { - zkClient.close(); - } - if (server != null) { - server.shutdown(); - } - } - } - - @Override - public String getSchemaFile() { - return null; - } - - @Override - public String getSolrConfigFile() { - return null; - } - - public void tearDown() throws Exception { - SolrConfig.severeErrors.clear(); - super.tearDown(); - } - -} +package org.apache.solr.cloud; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to You under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ + +import java.io.File; +import java.util.concurrent.atomic.AtomicInteger; + +import junit.framework.TestCase; + +import org.apache.solr.common.cloud.SolrZkClient; +import org.apache.solr.core.SolrConfig; +import org.apache.solr.util.AbstractSolrTestCase; +import org.apache.zookeeper.KeeperException; +import org.apache.zookeeper.WatchedEvent; +import org.apache.zookeeper.Watcher; + +public class ZkSolrClientTest extends AbstractSolrTestCase { + private static final boolean DEBUG = false; + + public void testConnect() throws Exception { + String zkDir = dataDir.getAbsolutePath() + File.separator + + "zookeeper/server1/data"; + ZkTestServer server = null; + + server = new ZkTestServer(zkDir); + server.run(); + + SolrZkClient zkClient = new SolrZkClient(server.getZkAddress(), 100); + + zkClient.close(); + server.shutdown(); + } + + public void testMakeRootNode() throws Exception { + String zkDir = dataDir.getAbsolutePath() + File.separator + + "zookeeper/server1/data"; + ZkTestServer server = null; + + server = new ZkTestServer(zkDir); + server.run(); + + AbstractZkTestCase.makeSolrZkNode(server.getZkHost()); + + SolrZkClient zkClient = new SolrZkClient(server.getZkHost(), + AbstractZkTestCase.TIMEOUT); + + assertTrue(zkClient.exists("/solr")); + + zkClient.close(); + server.shutdown(); + } + + public void testReconnect() throws Exception { + String zkDir = dataDir.getAbsolutePath() + File.separator + + "zookeeper/server1/data"; + ZkTestServer server = null; + SolrZkClient zkClient = null; + try { + server = new ZkTestServer(zkDir); + server.run(); + + AbstractZkTestCase.makeSolrZkNode(server.getZkHost()); + + zkClient = new SolrZkClient(server.getZkAddress(), AbstractZkTestCase.TIMEOUT); + String shardsPath = "/collections/collection1/shards"; + zkClient.makePath(shardsPath); + + zkClient.makePath("collections/collection1"); + int zkServerPort = server.getPort(); + // this tests disconnect state + server.shutdown(); + + Thread.sleep(80); + + + try { + zkClient.makePath("collections/collection2"); + TestCase.fail("Server should be down here"); + } catch (KeeperException.ConnectionLossException e) { + + } + + // bring server back up + server = new ZkTestServer(zkDir, zkServerPort); + server.run(); + + // TODO: can we do better? + // wait for reconnect + Thread.sleep(600); + + try { + zkClient.makePath("collections/collection3"); + } catch (KeeperException.ConnectionLossException e) { + Thread.sleep(5000); // try again in a bit + zkClient.makePath("collections/collection3"); + } + + if (DEBUG) { + zkClient.printLayoutToStdOut(); + } + + assertNotNull(zkClient.exists("/collections/collection3", null)); + assertNotNull(zkClient.exists("/collections/collection1", null)); + + // simulate session expiration + + // one option + long sessionId = zkClient.getSolrZooKeeper().getSessionId(); + server.expire(sessionId); + + // another option + //zkClient.getSolrZooKeeper().getConnection().disconnect(); + + // this tests expired state + + Thread.sleep(1000); // pause for reconnect + + for (int i = 0; i < 8; i++) { + try { + zkClient.makePath("collections/collection4"); + break; + } catch (KeeperException.SessionExpiredException e) { + + } catch (KeeperException.ConnectionLossException e) { + + } + Thread.sleep(1000 * i); + } + + if (DEBUG) { + zkClient.printLayoutToStdOut(); + } + + assertNotNull("Node does not exist, but it should", zkClient.exists("/collections/collection4", null)); + + } finally { + + if (zkClient != null) { + zkClient.close(); + } + if (server != null) { + server.shutdown(); + } + } + } + + public void testWatchChildren() throws Exception { + String zkDir = dataDir.getAbsolutePath() + File.separator + + "zookeeper/server1/data"; + + final AtomicInteger cnt = new AtomicInteger(); + ZkTestServer server = new ZkTestServer(zkDir); + server.run(); + Thread.sleep(400); + AbstractZkTestCase.makeSolrZkNode(server.getZkHost()); + final SolrZkClient zkClient = new SolrZkClient(server.getZkAddress(), AbstractZkTestCase.TIMEOUT); + try { + zkClient.makePath("/collections"); + + zkClient.getChildren("/collections", new Watcher() { + + public void process(WatchedEvent event) { + if (DEBUG) { + System.out.println("children changed"); + } + cnt.incrementAndGet(); + // remake watch + try { + zkClient.getChildren("/collections", this); + } catch (KeeperException e) { + throw new RuntimeException(e); + } catch (InterruptedException e) { + throw new RuntimeException(e); + } + } + }); + + zkClient.makePath("/collections/collection99/shards"); + + zkClient.makePath("collections/collection99/config=collection1"); + + zkClient.makePath("collections/collection99/config=collection3"); + + zkClient.makePath("/collections/collection97/shards"); + + if (DEBUG) { + zkClient.printLayoutToStdOut(); + } + + // pause for the watches to fire + Thread.sleep(700); + + if (cnt.intValue() < 2) { + Thread.sleep(4000); // wait a bit more + } + + assertEquals(2, cnt.intValue()); + + } finally { + + if (zkClient != null) { + zkClient.close(); + } + if (server != null) { + server.shutdown(); + } + } + } + + @Override + public String getSchemaFile() { + return null; + } + + @Override + public String getSolrConfigFile() { + return null; + } + + @Override + public void tearDown() throws Exception { + SolrConfig.severeErrors.clear(); + super.tearDown(); + } + +} diff --git a/solr/src/test/org/apache/solr/cloud/ZkTestServer.java b/solr/src/test/org/apache/solr/cloud/ZkTestServer.java index ef1275823cb..e5dcfed39fe 100644 --- a/solr/src/test/org/apache/solr/cloud/ZkTestServer.java +++ b/solr/src/test/org/apache/solr/cloud/ZkTestServer.java @@ -1,319 +1,319 @@ -package org.apache.solr.cloud; - -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with this - * work for additional information regarding copyright ownership. The ASF - * licenses this file to You under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations under - * the License. - */ - -import java.io.BufferedReader; -import java.io.File; -import java.io.IOException; -import java.io.InputStreamReader; -import java.io.OutputStream; -import java.net.InetAddress; -import java.net.InetSocketAddress; -import java.net.Socket; -import java.net.UnknownHostException; -import java.util.ArrayList; -import java.util.List; - -import javax.management.JMException; - -import org.apache.solr.SolrTestCaseJ4; -import org.apache.zookeeper.jmx.ManagedUtil; -import org.apache.zookeeper.server.NIOServerCnxn; -import org.apache.zookeeper.server.ServerConfig; -import org.apache.zookeeper.server.ZooKeeperServer; -import org.apache.zookeeper.server.SessionTracker.Session; -import org.apache.zookeeper.server.persistence.FileTxnSnapLog; -import org.apache.zookeeper.server.quorum.QuorumPeerConfig.ConfigException; - -public class ZkTestServer { - - protected final ZKServerMain zkServer = new ZKServerMain(); - - private String zkDir; - - private int clientPort; - - private Thread zooThread; - - class ZKServerMain { - - private NIOServerCnxn.Factory cnxnFactory; - private ZooKeeperServer zooKeeperServer; - - protected void initializeAndRun(String[] args) throws ConfigException, - IOException { - try { - ManagedUtil.registerLog4jMBeans(); - } catch (JMException e) { - - } - - ServerConfig config = new ServerConfig(); - if (args.length == 1) { - config.parse(args[0]); - } else { - config.parse(args); - } - - runFromConfig(config); - } - - /** - * Run from a ServerConfig. - * - * @param config ServerConfig to use. - * @throws IOException - */ - public void runFromConfig(ServerConfig config) throws IOException { - try { - // Note that this thread isn't going to be doing anything else, - // so rather than spawning another thread, we will just call - // run() in this thread. - // create a file logger url from the command line args - zooKeeperServer = new ZooKeeperServer(); - - FileTxnSnapLog ftxn = new FileTxnSnapLog(new File(config - .getDataLogDir()), new File(config.getDataDir())); - zooKeeperServer.setTxnLogFactory(ftxn); - zooKeeperServer.setTickTime(config.getTickTime()); - cnxnFactory = new NIOServerCnxn.Factory(config.getClientPortAddress(), config - .getMaxClientCnxns()); - cnxnFactory.startup(zooKeeperServer); - cnxnFactory.join(); - if (zooKeeperServer.isRunning()) { - zooKeeperServer.shutdown(); - } - } catch (InterruptedException e) { - } - } - - /** - * Shutdown the serving instance - * @throws IOException - */ - protected void shutdown() throws IOException { - zooKeeperServer.shutdown(); - zooKeeperServer.getZKDatabase().close(); - waitForServerDown(getZkHost() + ":" + getPort(), 5000); - cnxnFactory.shutdown(); - } - - public int getLocalPort() { - if (cnxnFactory == null) { - throw new IllegalStateException("A port has not yet been selected"); - } - int port = cnxnFactory.getLocalPort(); - if (port == 0) { - throw new IllegalStateException("A port has not yet been selected"); - } - return port; - } - } - - public ZkTestServer(String zkDir) { - this.zkDir = zkDir; - } - - public ZkTestServer(String zkDir, int port) { - this.zkDir = zkDir; - this.clientPort = port; - } - - public String getZkHost() { - return "127.0.0.1:" + zkServer.getLocalPort(); - } - - public String getZkAddress() { - return "127.0.0.1:" + zkServer.getLocalPort() + "/solr"; - } - - public int getPort() { - return zkServer.getLocalPort(); - } - - public void expire(final long sessionId) { - zkServer.zooKeeperServer.expire(new Session() { - @Override - public long getSessionId() { - return sessionId; - } - @Override - public int getTimeout() { - return 4000; - }}); - } - - public void run() throws InterruptedException { - // we don't call super.setUp - zooThread = new Thread() { - - @Override - public void run() { - ServerConfig config = new ServerConfig() { - - { - setClientPort(ZkTestServer.this.clientPort); - this.dataDir = zkDir; - this.dataLogDir = zkDir; - this.tickTime = 1500; - } - - public void setClientPort(int clientPort) { - if (clientPortAddress != null) { - try { - this.clientPortAddress = new InetSocketAddress( - InetAddress.getByName(clientPortAddress.getHostName()), clientPort); - } catch (UnknownHostException e) { - throw new RuntimeException(e); - } - } else { - this.clientPortAddress = new InetSocketAddress(clientPort); - } - } - }; - - try { - zkServer.runFromConfig(config); - } catch (Throwable e) { - throw new RuntimeException(e); - } - } - }; - - zooThread.setDaemon(true); - zooThread.start(); - - int cnt = 0; - int port = -1; - try { - port = getPort(); - } catch(IllegalStateException e) { - - } - while (port < 1) { - Thread.sleep(100); - try { - port = getPort(); - } catch(IllegalStateException e) { - - } - if (cnt == 40) { - throw new RuntimeException("Could not get the port for ZooKeeper server"); - } - cnt++; - } - } - - @SuppressWarnings("deprecation") - public void shutdown() throws IOException { - SolrTestCaseJ4.ignoreException("java.nio.channels.ClosedChannelException"); - // TODO: this can log an exception while trying to unregister a JMX MBean - try { - zkServer.shutdown(); - } finally { - SolrTestCaseJ4.resetExceptionIgnores(); - } - } - - - public static boolean waitForServerDown(String hp, long timeout) { - long start = System.currentTimeMillis(); - while (true) { - try { - HostPort hpobj = parseHostPortList(hp).get(0); - send4LetterWord(hpobj.host, hpobj.port, "stat"); - } catch (IOException e) { - return true; - } - - if (System.currentTimeMillis() > start + timeout) { - break; - } - try { - Thread.sleep(250); - } catch (InterruptedException e) { - // ignore - } - } - return false; - } - - public static class HostPort { - String host; - int port; - - HostPort(String host, int port) { - this.host = host; - this.port = port; - } - } - - /** - * Send the 4letterword - * @param host the destination host - * @param port the destination port - * @param cmd the 4letterword - * @return - * @throws IOException - */ - public static String send4LetterWord(String host, int port, String cmd) - throws IOException - { - - Socket sock = new Socket(host, port); - BufferedReader reader = null; - try { - OutputStream outstream = sock.getOutputStream(); - outstream.write(cmd.getBytes()); - outstream.flush(); - // this replicates NC - close the output stream before reading - sock.shutdownOutput(); - - reader = - new BufferedReader( - new InputStreamReader(sock.getInputStream())); - StringBuilder sb = new StringBuilder(); - String line; - while((line = reader.readLine()) != null) { - sb.append(line + "\n"); - } - return sb.toString(); - } finally { - sock.close(); - if (reader != null) { - reader.close(); - } - } - } - - public static List parseHostPortList(String hplist) { - ArrayList alist = new ArrayList(); - for (String hp : hplist.split(",")) { - int idx = hp.lastIndexOf(':'); - String host = hp.substring(0, idx); - int port; - try { - port = Integer.parseInt(hp.substring(idx + 1)); - } catch (RuntimeException e) { - throw new RuntimeException("Problem parsing " + hp + e.toString()); - } - alist.add(new HostPort(host, port)); - } - return alist; - } -} +package org.apache.solr.cloud; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to You under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ + +import java.io.BufferedReader; +import java.io.File; +import java.io.IOException; +import java.io.InputStreamReader; +import java.io.OutputStream; +import java.net.InetAddress; +import java.net.InetSocketAddress; +import java.net.Socket; +import java.net.UnknownHostException; +import java.util.ArrayList; +import java.util.List; + +import javax.management.JMException; + +import org.apache.solr.SolrTestCaseJ4; +import org.apache.zookeeper.jmx.ManagedUtil; +import org.apache.zookeeper.server.NIOServerCnxn; +import org.apache.zookeeper.server.ServerConfig; +import org.apache.zookeeper.server.ZooKeeperServer; +import org.apache.zookeeper.server.SessionTracker.Session; +import org.apache.zookeeper.server.persistence.FileTxnSnapLog; +import org.apache.zookeeper.server.quorum.QuorumPeerConfig.ConfigException; + +public class ZkTestServer { + + protected final ZKServerMain zkServer = new ZKServerMain(); + + private String zkDir; + + private int clientPort; + + private Thread zooThread; + + class ZKServerMain { + + private NIOServerCnxn.Factory cnxnFactory; + private ZooKeeperServer zooKeeperServer; + + protected void initializeAndRun(String[] args) throws ConfigException, + IOException { + try { + ManagedUtil.registerLog4jMBeans(); + } catch (JMException e) { + + } + + ServerConfig config = new ServerConfig(); + if (args.length == 1) { + config.parse(args[0]); + } else { + config.parse(args); + } + + runFromConfig(config); + } + + /** + * Run from a ServerConfig. + * + * @param config ServerConfig to use. + * @throws IOException + */ + public void runFromConfig(ServerConfig config) throws IOException { + try { + // Note that this thread isn't going to be doing anything else, + // so rather than spawning another thread, we will just call + // run() in this thread. + // create a file logger url from the command line args + zooKeeperServer = new ZooKeeperServer(); + + FileTxnSnapLog ftxn = new FileTxnSnapLog(new File(config + .getDataLogDir()), new File(config.getDataDir())); + zooKeeperServer.setTxnLogFactory(ftxn); + zooKeeperServer.setTickTime(config.getTickTime()); + cnxnFactory = new NIOServerCnxn.Factory(config.getClientPortAddress(), config + .getMaxClientCnxns()); + cnxnFactory.startup(zooKeeperServer); + cnxnFactory.join(); + if (zooKeeperServer.isRunning()) { + zooKeeperServer.shutdown(); + } + } catch (InterruptedException e) { + } + } + + /** + * Shutdown the serving instance + * @throws IOException + */ + protected void shutdown() throws IOException { + zooKeeperServer.shutdown(); + zooKeeperServer.getZKDatabase().close(); + waitForServerDown(getZkHost() + ":" + getPort(), 5000); + cnxnFactory.shutdown(); + } + + public int getLocalPort() { + if (cnxnFactory == null) { + throw new IllegalStateException("A port has not yet been selected"); + } + int port = cnxnFactory.getLocalPort(); + if (port == 0) { + throw new IllegalStateException("A port has not yet been selected"); + } + return port; + } + } + + public ZkTestServer(String zkDir) { + this.zkDir = zkDir; + } + + public ZkTestServer(String zkDir, int port) { + this.zkDir = zkDir; + this.clientPort = port; + } + + public String getZkHost() { + return "127.0.0.1:" + zkServer.getLocalPort(); + } + + public String getZkAddress() { + return "127.0.0.1:" + zkServer.getLocalPort() + "/solr"; + } + + public int getPort() { + return zkServer.getLocalPort(); + } + + public void expire(final long sessionId) { + zkServer.zooKeeperServer.expire(new Session() { + @Override + public long getSessionId() { + return sessionId; + } + @Override + public int getTimeout() { + return 4000; + }}); + } + + public void run() throws InterruptedException { + // we don't call super.setUp + zooThread = new Thread() { + + @Override + public void run() { + ServerConfig config = new ServerConfig() { + + { + setClientPort(ZkTestServer.this.clientPort); + this.dataDir = zkDir; + this.dataLogDir = zkDir; + this.tickTime = 1500; + } + + public void setClientPort(int clientPort) { + if (clientPortAddress != null) { + try { + this.clientPortAddress = new InetSocketAddress( + InetAddress.getByName(clientPortAddress.getHostName()), clientPort); + } catch (UnknownHostException e) { + throw new RuntimeException(e); + } + } else { + this.clientPortAddress = new InetSocketAddress(clientPort); + } + } + }; + + try { + zkServer.runFromConfig(config); + } catch (Throwable e) { + throw new RuntimeException(e); + } + } + }; + + zooThread.setDaemon(true); + zooThread.start(); + + int cnt = 0; + int port = -1; + try { + port = getPort(); + } catch(IllegalStateException e) { + + } + while (port < 1) { + Thread.sleep(100); + try { + port = getPort(); + } catch(IllegalStateException e) { + + } + if (cnt == 40) { + throw new RuntimeException("Could not get the port for ZooKeeper server"); + } + cnt++; + } + } + + @SuppressWarnings("deprecation") + public void shutdown() throws IOException { + SolrTestCaseJ4.ignoreException("java.nio.channels.ClosedChannelException"); + // TODO: this can log an exception while trying to unregister a JMX MBean + try { + zkServer.shutdown(); + } finally { + SolrTestCaseJ4.resetExceptionIgnores(); + } + } + + + public static boolean waitForServerDown(String hp, long timeout) { + long start = System.currentTimeMillis(); + while (true) { + try { + HostPort hpobj = parseHostPortList(hp).get(0); + send4LetterWord(hpobj.host, hpobj.port, "stat"); + } catch (IOException e) { + return true; + } + + if (System.currentTimeMillis() > start + timeout) { + break; + } + try { + Thread.sleep(250); + } catch (InterruptedException e) { + // ignore + } + } + return false; + } + + public static class HostPort { + String host; + int port; + + HostPort(String host, int port) { + this.host = host; + this.port = port; + } + } + + /** + * Send the 4letterword + * @param host the destination host + * @param port the destination port + * @param cmd the 4letterword + * @return + * @throws IOException + */ + public static String send4LetterWord(String host, int port, String cmd) + throws IOException + { + + Socket sock = new Socket(host, port); + BufferedReader reader = null; + try { + OutputStream outstream = sock.getOutputStream(); + outstream.write(cmd.getBytes("US-ASCII")); + outstream.flush(); + // this replicates NC - close the output stream before reading + sock.shutdownOutput(); + + reader = + new BufferedReader( + new InputStreamReader(sock.getInputStream())); + StringBuilder sb = new StringBuilder(); + String line; + while((line = reader.readLine()) != null) { + sb.append(line + "\n"); + } + return sb.toString(); + } finally { + sock.close(); + if (reader != null) { + reader.close(); + } + } + } + + public static List parseHostPortList(String hplist) { + ArrayList alist = new ArrayList(); + for (String hp : hplist.split(",")) { + int idx = hp.lastIndexOf(':'); + String host = hp.substring(0, idx); + int port; + try { + port = Integer.parseInt(hp.substring(idx + 1)); + } catch (RuntimeException e) { + throw new RuntimeException("Problem parsing " + hp + e.toString()); + } + alist.add(new HostPort(host, port)); + } + return alist; + } +} diff --git a/solr/src/test/org/apache/solr/common/util/ContentStreamTest.java b/solr/src/test/org/apache/solr/common/util/ContentStreamTest.java index 5aa7138d5b7..ec989f8a0c9 100755 --- a/solr/src/test/org/apache/solr/common/util/ContentStreamTest.java +++ b/solr/src/test/org/apache/solr/common/util/ContentStreamTest.java @@ -25,7 +25,9 @@ import java.io.FileReader; import java.io.IOException; import java.io.InputStream; import java.io.StringReader; +import java.net.ConnectException; import java.net.URL; +import java.net.URLConnection; import org.apache.commons.io.IOUtils; import org.apache.lucene.util.LuceneTestCase; @@ -41,7 +43,7 @@ public class ContentStreamTest extends LuceneTestCase String input = "aads ghaskdgasgldj asl sadg ajdsg &jag # @ hjsakg hsakdg hjkas s"; ContentStreamBase stream = new ContentStreamBase.StringStream( input ); assertEquals( input.length(), stream.getSize().intValue() ); - assertEquals( input, IOUtils.toString( stream.getStream() ) ); + assertEquals( input, IOUtils.toString( stream.getStream(), "UTF-8" ) ); assertEquals( input, IOUtils.toString( stream.getReader() ) ); } @@ -63,33 +65,43 @@ public class ContentStreamTest extends LuceneTestCase public void testURLStream() throws IOException { - String content = null; + byte[] content = null; + String contentType = null; URL url = new URL( "http://svn.apache.org/repos/asf/lucene/dev/trunk/" ); - InputStream in = url.openStream(); + InputStream in = null; try { - content = IOUtils.toString( in ); - } - finally { - IOUtils.closeQuietly(in); + URLConnection conn = url.openConnection(); + in = conn.getInputStream(); + contentType = conn.getContentType(); + content = IOUtils.toByteArray(in); + } catch (ConnectException ex) { + assumeNoException("Unable to connect to " + url + " to run the test.", ex); + }finally { + if (in != null) { + IOUtils.closeQuietly(in); + } } - assertTrue( content.length() > 10 ); // found something... + assertTrue( content.length > 10 ); // found something... ContentStreamBase stream = new ContentStreamBase.URLStream( url ); - assertEquals( content.length(), stream.getSize().intValue() ); + assertEquals( content.length, stream.getSize().intValue() ); // Test the stream in = stream.getStream(); try { assertTrue( IOUtils.contentEquals( - new ByteArrayInputStream( content.getBytes() ), in ) ); + new ByteArrayInputStream(content), in ) ); } finally { IOUtils.closeQuietly(in); } + String charset = ContentStreamBase.getCharsetFromContentType(contentType); + if (charset == null) + charset = ContentStreamBase.DEFAULT_CHARSET; // Re-open the stream and this time use a reader stream = new ContentStreamBase.URLStream( url ); - assertTrue( IOUtils.contentEquals( new StringReader( content ), stream.getReader() ) ); + assertTrue( IOUtils.contentEquals( new StringReader(new String(content, charset)), stream.getReader() ) ); } } diff --git a/solr/src/test/org/apache/solr/common/util/DOMUtilTest.java b/solr/src/test/org/apache/solr/common/util/DOMUtilTest.java index 87403355472..069c2f933e1 100644 --- a/solr/src/test/org/apache/solr/common/util/DOMUtilTest.java +++ b/solr/src/test/org/apache/solr/common/util/DOMUtilTest.java @@ -17,7 +17,7 @@ package org.apache.solr.common.util; -import java.io.ByteArrayInputStream; +import java.io.StringReader; import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilderFactory; @@ -27,6 +27,7 @@ import javax.xml.xpath.XPathFactory; import org.w3c.dom.Document; import org.w3c.dom.Node; +import org.xml.sax.InputSource; import org.apache.lucene.util.LuceneTestCase; @@ -85,6 +86,6 @@ public class DOMUtilTest extends LuceneTestCase { } public Document getDocument( String xml ) throws Exception { - return builder.parse( new ByteArrayInputStream( xml.getBytes() ) ); + return builder.parse(new InputSource(new StringReader(xml))); } } diff --git a/solr/src/test/org/apache/solr/core/AlternateDirectoryTest.java b/solr/src/test/org/apache/solr/core/AlternateDirectoryTest.java index 9f1d46a92b7..b036cb8c35d 100755 --- a/solr/src/test/org/apache/solr/core/AlternateDirectoryTest.java +++ b/solr/src/test/org/apache/solr/core/AlternateDirectoryTest.java @@ -48,6 +48,7 @@ public class AlternateDirectoryTest extends SolrTestCaseJ4 { public static volatile boolean openCalled = false; public static volatile Directory dir; + @Override public Directory open(String path) throws IOException { openCalled = true; // need to close the directory, or otherwise the test fails. @@ -63,6 +64,7 @@ public class AlternateDirectoryTest extends SolrTestCaseJ4 { static public class TestIndexReaderFactory extends IndexReaderFactory { static volatile boolean newReaderCalled = false; + @Override public IndexReader newReader(Directory indexDir, boolean readOnly) throws IOException { TestIndexReaderFactory.newReaderCalled = true; diff --git a/solr/src/test/org/apache/solr/core/DummyValueSourceParser.java b/solr/src/test/org/apache/solr/core/DummyValueSourceParser.java index 96f27c8689e..3e13f693450 100644 --- a/solr/src/test/org/apache/solr/core/DummyValueSourceParser.java +++ b/solr/src/test/org/apache/solr/core/DummyValueSourceParser.java @@ -32,17 +32,21 @@ import org.apache.solr.search.function.ValueSource; public class DummyValueSourceParser extends ValueSourceParser { private NamedList args; + @Override public void init(NamedList args) { this.args = args; } + @Override public ValueSource parse(FunctionQParser fp) throws ParseException { ValueSource source = fp.parseValueSource(); ValueSource result = new SimpleFloatFunction(source) { + @Override protected String name() { return "foo"; } + @Override protected float func(int doc, DocValues vals) { float result = 0; return result; diff --git a/solr/src/test/org/apache/solr/core/IndexReaderFactoryTest.java b/solr/src/test/org/apache/solr/core/IndexReaderFactoryTest.java index 870e21d5567..1fbec3fde3b 100644 --- a/solr/src/test/org/apache/solr/core/IndexReaderFactoryTest.java +++ b/solr/src/test/org/apache/solr/core/IndexReaderFactoryTest.java @@ -20,10 +20,12 @@ import org.apache.solr.util.AbstractSolrTestCase; public class IndexReaderFactoryTest extends AbstractSolrTestCase { + @Override public String getSchemaFile() { return "schema.xml"; } + @Override public String getSolrConfigFile() { return "solrconfig-termindex.xml"; } diff --git a/solr/src/test/org/apache/solr/core/MockQuerySenderListenerReqHandler.java b/solr/src/test/org/apache/solr/core/MockQuerySenderListenerReqHandler.java index d4a27c0ded8..4b351684c23 100644 --- a/solr/src/test/org/apache/solr/core/MockQuerySenderListenerReqHandler.java +++ b/solr/src/test/org/apache/solr/core/MockQuerySenderListenerReqHandler.java @@ -34,36 +34,43 @@ public class MockQuerySenderListenerReqHandler extends RequestHandlerBase { AtomicInteger initCounter = new AtomicInteger(0); + @Override public void init(NamedList args) { initCounter.incrementAndGet(); super.init(args); } + @Override public void handleRequestBody(SolrQueryRequest req, SolrQueryResponse rsp) throws Exception { this.req = req; this.rsp = rsp; } + @Override public String getDescription() { String result = null; return result; } + @Override public String getSourceId() { String result = null; return result; } + @Override public String getSource() { String result = null; return result; } + @Override public String getVersion() { String result = null; return result; } + @Override public NamedList getStatistics() { NamedList lst = super.getStatistics(); lst.add("initCount", initCounter.intValue()); diff --git a/solr/src/test/org/apache/solr/core/RAMDirectoryFactoryTest.java b/solr/src/test/org/apache/solr/core/RAMDirectoryFactoryTest.java index 6e480a65556..9ca8583ea87 100644 --- a/solr/src/test/org/apache/solr/core/RAMDirectoryFactoryTest.java +++ b/solr/src/test/org/apache/solr/core/RAMDirectoryFactoryTest.java @@ -18,6 +18,7 @@ package org.apache.solr.core; import org.apache.lucene.store.Directory; +import org.apache.lucene.store.SingleInstanceLockFactory; import org.apache.lucene.util.LuceneTestCase; import java.io.IOException; @@ -27,7 +28,7 @@ import java.io.IOException; public class RAMDirectoryFactoryTest extends LuceneTestCase { public void testOpenReturnsTheSameForSamePath() throws IOException { final Directory directory = new RefCntRamDirectory(); - RAMDirectoryFactory factory = new RAMDirectoryFactory() { + RAMDirectoryFactory factory = new RAMDirectoryFactory() { @Override Directory openNew(String path) throws IOException { return directory; @@ -40,6 +41,8 @@ public class RAMDirectoryFactoryTest extends LuceneTestCase { "every time open() is called for the same path", directory, dir1); assertEquals("RAMDirectoryFactory should not create new instance of RefCntRamDirectory " + "every time open() is called for the same path", directory, dir2); + dir1.close(); + dir2.close(); } public void testOpenSucceedForEmptyDir() throws IOException { diff --git a/solr/src/test/org/apache/solr/core/TestArbitraryIndexDir.java b/solr/src/test/org/apache/solr/core/TestArbitraryIndexDir.java index 60ec7d05be9..59d1afba922 100644 --- a/solr/src/test/org/apache/solr/core/TestArbitraryIndexDir.java +++ b/solr/src/test/org/apache/solr/core/TestArbitraryIndexDir.java @@ -42,6 +42,7 @@ import org.xml.sax.SAXException; */ public class TestArbitraryIndexDir extends AbstractSolrTestCase{ + @Override public void setUp() throws Exception { super.setUp(); @@ -58,6 +59,7 @@ public class TestArbitraryIndexDir extends AbstractSolrTestCase{ ("standard",0,20,"version","2.2"); } + @Override public void tearDown() throws Exception { super.tearDown(); @@ -99,8 +101,7 @@ public class TestArbitraryIndexDir extends AbstractSolrTestCase{ Directory dir = newFSDirectory(newDir); IndexWriter iw = new IndexWriter( dir, - new IndexWriterConfig(Version.LUCENE_40, new StandardAnalyzer(Version.LUCENE_40)). - setMaxFieldLength(1000) + new IndexWriterConfig(Version.LUCENE_40, new StandardAnalyzer(Version.LUCENE_40)) ); Document doc = new Document(); doc.add(new Field("id", "2", Field.Store.YES, Field.Index.ANALYZED)); diff --git a/solr/src/test/org/apache/solr/core/TestBadConfig.java b/solr/src/test/org/apache/solr/core/TestBadConfig.java index 54a742da392..d7ceb8955f4 100644 --- a/solr/src/test/org/apache/solr/core/TestBadConfig.java +++ b/solr/src/test/org/apache/solr/core/TestBadConfig.java @@ -21,9 +21,12 @@ import org.apache.solr.util.AbstractSolrTestCase; public class TestBadConfig extends AbstractSolrTestCase { + @Override public String getSchemaFile() { return "schema.xml"; } + @Override public String getSolrConfigFile() { return "bad_solrconfig.xml"; } + @Override public void setUp() throws Exception { ignoreException("unset.sys.property"); try { diff --git a/solr/src/test/org/apache/solr/core/TestConfig.java b/solr/src/test/org/apache/solr/core/TestConfig.java index 03676c881e6..8c4145a610d 100644 --- a/solr/src/test/org/apache/solr/core/TestConfig.java +++ b/solr/src/test/org/apache/solr/core/TestConfig.java @@ -139,7 +139,7 @@ public class TestConfig extends SolrTestCaseJ4 { StandardIndexReaderFactory sirf = (StandardIndexReaderFactory) irf; assertEquals(12, sirf.termInfosIndexDivisor); SolrQueryRequest req = req(); - assertEquals(12, req.getSearcher().getReader().getTermInfosIndexDivisor()); + assertEquals(12, req.getSearcher().getIndexReader().getTermInfosIndexDivisor()); req.close(); } diff --git a/solr/src/test/org/apache/solr/core/TestJmxIntegration.java b/solr/src/test/org/apache/solr/core/TestJmxIntegration.java index 46f42d11fec..3977c4a1e94 100644 --- a/solr/src/test/org/apache/solr/core/TestJmxIntegration.java +++ b/solr/src/test/org/apache/solr/core/TestJmxIntegration.java @@ -46,6 +46,7 @@ public class TestJmxIntegration extends AbstractSolrTestCase { return "solrconfig.xml"; } + @Override @Before public void setUp() throws Exception { // Make sure that at least one MBeanServer is available @@ -53,6 +54,7 @@ public class TestJmxIntegration extends AbstractSolrTestCase { super.setUp(); } + @Override @After public void tearDown() throws Exception { super.tearDown(); diff --git a/solr/src/test/org/apache/solr/core/TestJmxMonitoredMap.java b/solr/src/test/org/apache/solr/core/TestJmxMonitoredMap.java index 2356df8a6c3..ff91adeb41b 100644 --- a/solr/src/test/org/apache/solr/core/TestJmxMonitoredMap.java +++ b/solr/src/test/org/apache/solr/core/TestJmxMonitoredMap.java @@ -51,6 +51,7 @@ public class TestJmxMonitoredMap extends LuceneTestCase { private JmxMonitoredMap monitoredMap; + @Override @Before public void setUp() throws Exception { super.setUp(); @@ -84,6 +85,7 @@ public class TestJmxMonitoredMap extends LuceneTestCase { } } + @Override @After public void tearDown() throws Exception { try { diff --git a/solr/src/test/org/apache/solr/core/TestLegacyMergeSchedulerPolicyConfig.java b/solr/src/test/org/apache/solr/core/TestLegacyMergeSchedulerPolicyConfig.java index f0bd861aaa2..e89815cecce 100644 --- a/solr/src/test/org/apache/solr/core/TestLegacyMergeSchedulerPolicyConfig.java +++ b/solr/src/test/org/apache/solr/core/TestLegacyMergeSchedulerPolicyConfig.java @@ -1,5 +1,22 @@ package org.apache.solr.core; +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + import java.io.IOException; import org.apache.lucene.index.IndexWriter; diff --git a/solr/src/test/org/apache/solr/core/TestPropInject.java b/solr/src/test/org/apache/solr/core/TestPropInject.java index c84e13fe877..345feacb8cb 100644 --- a/solr/src/test/org/apache/solr/core/TestPropInject.java +++ b/solr/src/test/org/apache/solr/core/TestPropInject.java @@ -1,5 +1,22 @@ package org.apache.solr.core; +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + import java.io.IOException; import org.apache.lucene.index.ConcurrentMergeScheduler; @@ -9,10 +26,12 @@ import org.apache.solr.update.DirectUpdateHandler2; import org.apache.solr.util.AbstractSolrTestCase; public class TestPropInject extends AbstractSolrTestCase { + @Override public String getSchemaFile() { return "schema.xml"; } + @Override public String getSolrConfigFile() { return "solrconfig-propinject.xml"; } diff --git a/solr/src/test/org/apache/solr/core/TestQuerySenderListener.java b/solr/src/test/org/apache/solr/core/TestQuerySenderListener.java index b8edad8e255..70a7501c6e8 100644 --- a/solr/src/test/org/apache/solr/core/TestQuerySenderListener.java +++ b/solr/src/test/org/apache/solr/core/TestQuerySenderListener.java @@ -75,7 +75,7 @@ public class TestQuerySenderListener extends SolrTestCaseJ4 { String evt = mock.req.getParams().get(EventParams.EVENT); assertNotNull("Event is null", evt); assertTrue(evt + " is not equal to " + EventParams.FIRST_SEARCHER, evt.equals(EventParams.FIRST_SEARCHER) == true); - Directory dir = currentSearcher.getReader().directory(); + Directory dir = currentSearcher.getIndexReader().directory(); SolrIndexSearcher newSearcher = new SolrIndexSearcher(core, core.getSchema(), "testQuerySenderListener", dir, true, false); qsl.newSearcher(newSearcher, currentSearcher); diff --git a/solr/src/test/org/apache/solr/core/TestSolrDeletionPolicy1.java b/solr/src/test/org/apache/solr/core/TestSolrDeletionPolicy1.java index 2d9cf6c03b1..29f2f5d8cf2 100644 --- a/solr/src/test/org/apache/solr/core/TestSolrDeletionPolicy1.java +++ b/solr/src/test/org/apache/solr/core/TestSolrDeletionPolicy1.java @@ -34,6 +34,7 @@ public class TestSolrDeletionPolicy1 extends SolrTestCaseJ4 { initCore("solrconfig-delpolicy1.xml","schema.xml"); } + @Override @Before public void setUp() throws Exception { super.setUp(); diff --git a/solr/src/test/org/apache/solr/core/TestXIncludeConfig.java b/solr/src/test/org/apache/solr/core/TestXIncludeConfig.java index 95b03bfb327..fbe8d74c86a 100644 --- a/solr/src/test/org/apache/solr/core/TestXIncludeConfig.java +++ b/solr/src/test/org/apache/solr/core/TestXIncludeConfig.java @@ -1,5 +1,22 @@ package org.apache.solr.core; +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + import java.io.File; import org.apache.commons.io.FileUtils; @@ -16,11 +33,13 @@ import javax.xml.parsers.DocumentBuilderFactory; public class TestXIncludeConfig extends AbstractSolrTestCase { protected boolean supports; + @Override public String getSchemaFile() { return "schema.xml"; } //public String getSolrConfigFile() { return "solrconfig.xml"; } + @Override public String getSolrConfigFile() { return "solrconfig-xinclude.xml"; } diff --git a/solr/src/test/org/apache/solr/handler/DocumentAnalysisRequestHandlerTest.java b/solr/src/test/org/apache/solr/handler/DocumentAnalysisRequestHandlerTest.java index 2454bfba7cd..1753d77cb84 100644 --- a/solr/src/test/org/apache/solr/handler/DocumentAnalysisRequestHandlerTest.java +++ b/solr/src/test/org/apache/solr/handler/DocumentAnalysisRequestHandlerTest.java @@ -30,8 +30,12 @@ import org.junit.Before; import org.junit.BeforeClass; import org.junit.Test; -import java.util.ArrayList; +import java.util.Collections; import java.util.List; +import java.io.ByteArrayInputStream; +import java.io.InputStream; +import java.io.IOException; +import java.io.Reader; /** * A test for {@link DocumentAnalysisRequestHandler}. @@ -71,15 +75,14 @@ public class DocumentAnalysisRequestHandlerTest extends AnalysisRequestHandlerTe "" + ""; - final List contentStreams = new ArrayList(1); - contentStreams.add(new ContentStreamBase.StringStream(docsInput)); + final ContentStream cs = new ContentStreamBase.StringStream(docsInput); ModifiableSolrParams params = new ModifiableSolrParams(); params.add("analysis.query", "The Query String"); params.add("analysis.showmatch", "true"); SolrQueryRequest req = new SolrQueryRequestBase(h.getCore(), params) { @Override public Iterable getContentStreams() { - return contentStreams; + return Collections.singleton(cs); } }; @@ -106,6 +109,94 @@ public class DocumentAnalysisRequestHandlerTest extends AnalysisRequestHandlerTe req.close(); } + /** A binary-only ContentStream */ + static class ByteStream extends ContentStreamBase { + private final byte[] bytes; + + public ByteStream(byte[] bytes, String contentType) { + this.bytes = bytes; + this.contentType = contentType; + name = null; + size = Long.valueOf(bytes.length); + sourceInfo = "rawBytes"; + } + + public InputStream getStream() throws IOException { + return new ByteArrayInputStream(bytes); + } + + @Override + public Reader getReader() throws IOException { + throw new IOException("This is a byte stream, Readers are not supported."); + } + } + + + // This test should also test charset detection in UpdateRequestHandler, + // but the DocumentAnalysisRequestHandler is simplier to use/check. + @Test + public void testCharsetInDocument() throws Exception { + final byte[] xmlBytes = ( + "\r\n" + + "\r\n" + + " \r\n" + + " Müller\r\n" + + " " + + "" + ).getBytes("ISO-8859-1"); + + // we declare a content stream without charset: + final ContentStream cs = new ByteStream(xmlBytes, "application/xml"); + + ModifiableSolrParams params = new ModifiableSolrParams(); + SolrQueryRequest req = new SolrQueryRequestBase(h.getCore(), params) { + @Override + public Iterable getContentStreams() { + return Collections.singleton(cs); + } + }; + + DocumentAnalysisRequest request = handler.resolveAnalysisRequest(req); + assertNotNull(request); + final List documents = request.getDocuments(); + assertNotNull(documents); + assertEquals(1, documents.size()); + SolrInputDocument doc = documents.get(0); + assertEquals("Müller", doc.getField("id").getValue()); + } + + // This test should also test charset detection in UpdateRequestHandler, + // but the DocumentAnalysisRequestHandler is simplier to use/check. + @Test + public void testCharsetOutsideDocument() throws Exception { + final byte[] xmlBytes = ( + "\r\n" + + " \r\n" + + " Müller\r\n" + + " " + + "" + ).getBytes("ISO-8859-1"); + + // we declare a content stream with charset: + final ContentStream cs = new ByteStream(xmlBytes, "application/xml; charset=ISO-8859-1"); + + ModifiableSolrParams params = new ModifiableSolrParams(); + SolrQueryRequest req = new SolrQueryRequestBase(h.getCore(), params) { + @Override + public Iterable getContentStreams() { + return Collections.singleton(cs); + } + }; + + DocumentAnalysisRequest request = handler.resolveAnalysisRequest(req); + assertNotNull(request); + final List documents = request.getDocuments(); + assertNotNull(documents); + assertEquals(1, documents.size()); + SolrInputDocument doc = documents.get(0); + assertEquals("Müller", doc.getField("id").getValue()); + } + /** * Tests the {@link DocumentAnalysisRequestHandler#handleAnalysisRequest(org.apache.solr.client.solrj.request.DocumentAnalysisRequest, * org.apache.solr.schema.IndexSchema)} diff --git a/solr/src/test/org/apache/solr/handler/JsonLoaderTest.java b/solr/src/test/org/apache/solr/handler/JsonLoaderTest.java index c9b280d88d4..e6635475356 100644 --- a/solr/src/test/org/apache/solr/handler/JsonLoaderTest.java +++ b/solr/src/test/org/apache/solr/handler/JsonLoaderTest.java @@ -146,23 +146,28 @@ class BufferingRequestProcessor extends UpdateRequestProcessor super(next); } + @Override public void processAdd(AddUpdateCommand cmd) throws IOException { addCommands.add( cmd ); } + @Override public void processDelete(DeleteUpdateCommand cmd) throws IOException { deleteCommands.add( cmd ); } + @Override public void processCommit(CommitUpdateCommand cmd) throws IOException { commitCommands.add( cmd ); } + @Override public void processRollback(RollbackUpdateCommand cmd) throws IOException { rollbackCommands.add( cmd ); } + @Override public void finish() throws IOException { // nothing? } diff --git a/solr/src/test/org/apache/solr/handler/MoreLikeThisHandlerTest.java b/solr/src/test/org/apache/solr/handler/MoreLikeThisHandlerTest.java index 63b1edde582..6dbae21f244 100644 --- a/solr/src/test/org/apache/solr/handler/MoreLikeThisHandlerTest.java +++ b/solr/src/test/org/apache/solr/handler/MoreLikeThisHandlerTest.java @@ -94,7 +94,17 @@ public class MoreLikeThisHandlerTest extends SolrTestCaseJ4 { assertQ("morelike this - harrison ford",mltreq ,"//result/doc[1]/int[@name='id'][.='45']"); + // test MoreLikeThis debug + params.set(CommonParams.DEBUG_QUERY, "true"); + assertQ("morelike this - harrison ford",mltreq + ,"//lst[@name='debug']/lst[@name='moreLikeThis']/lst[@name='44']/str[@name='rawMLTQuery']" + ,"//lst[@name='debug']/lst[@name='moreLikeThis']/lst[@name='44']/str[@name='boostedMLTQuery']" + ,"//lst[@name='debug']/lst[@name='moreLikeThis']/lst[@name='44']/str[@name='realMLTQuery']" + ,"//lst[@name='debug']/lst[@name='moreLikeThis']/lst[@name='44']/lst[@name='explain']/str[@name='45']" + ); + // test that qparser plugins work + params.remove(CommonParams.DEBUG_QUERY); params.set(CommonParams.Q, "{!field f=id}44"); assertQ(mltreq ,"//result/doc[1]/int[@name='id'][.='45']"); @@ -112,9 +122,9 @@ public class MoreLikeThisHandlerTest extends SolrTestCaseJ4 { assertQ(mltreq ,"//result/doc[1]/int[@name='id'][.='45']"); - // test that debugging works + // test that debugging works (test for MoreLikeThis*Handler*) params.set(CommonParams.QT, "/mlt"); - params.set("debugQuery", "true"); + params.set(CommonParams.DEBUG_QUERY, "true"); assertQ(mltreq ,"//result/doc[1]/int[@name='id'][.='45']" ,"//lst[@name='debug']/lst[@name='explain']" diff --git a/solr/src/test/org/apache/solr/handler/StandardRequestHandlerTest.java b/solr/src/test/org/apache/solr/handler/StandardRequestHandlerTest.java index e9dd455da55..1cb930f94d6 100644 --- a/solr/src/test/org/apache/solr/handler/StandardRequestHandlerTest.java +++ b/solr/src/test/org/apache/solr/handler/StandardRequestHandlerTest.java @@ -43,9 +43,9 @@ public class StandardRequestHandlerTest extends AbstractSolrTestCase { public void testSorting() throws Exception { SolrCore core = h.getCore(); - assertU(adoc("id", "10", "title", "test", "val_s", "aaa")); - assertU(adoc("id", "11", "title", "test", "val_s", "bbb")); - assertU(adoc("id", "12", "title", "test", "val_s", "ccc")); + assertU(adoc("id", "10", "title", "test", "val_s1", "aaa")); + assertU(adoc("id", "11", "title", "test", "val_s1", "bbb")); + assertU(adoc("id", "12", "title", "test", "val_s1", "ccc")); assertU(commit()); Map args = new HashMap(); @@ -58,7 +58,7 @@ public class StandardRequestHandlerTest extends AbstractSolrTestCase { ,"//*[@numFound='3']" ); - args.put( CommonParams.SORT, "val_s asc" ); + args.put( CommonParams.SORT, "val_s1 asc" ); assertQ("with sort param [asc]", req ,"//*[@numFound='3']" ,"//result/doc[1]/int[@name='id'][.='10']" @@ -66,7 +66,7 @@ public class StandardRequestHandlerTest extends AbstractSolrTestCase { ,"//result/doc[3]/int[@name='id'][.='12']" ); - args.put( CommonParams.SORT, "val_s desc" ); + args.put( CommonParams.SORT, "val_s1 desc" ); assertQ("with sort param [desc]", req ,"//*[@numFound='3']" ,"//result/doc[1]/int[@name='id'][.='12']" @@ -84,7 +84,7 @@ public class StandardRequestHandlerTest extends AbstractSolrTestCase { // Using legacy ';' param args.remove( CommonParams.SORT ); args.put( QueryParsing.DEFTYPE, "lucenePlusSort" ); - args.put( CommonParams.Q, "title:test; val_s desc" ); + args.put( CommonParams.Q, "title:test; val_s1 desc" ); assertQ("with sort param [desc]", req ,"//*[@numFound='3']" ,"//result/doc[1]/int[@name='id'][.='12']" @@ -92,8 +92,8 @@ public class StandardRequestHandlerTest extends AbstractSolrTestCase { ,"//result/doc[3]/int[@name='id'][.='10']" ); - args.put( CommonParams.Q, "title:test; val_s asc" ); - assertQ("with sort param [desc]", req + args.put( CommonParams.Q, "title:test; val_s1 asc" ); + assertQ("with sort param [asc]", req ,"//*[@numFound='3']" ,"//result/doc[1]/int[@name='id'][.='10']" ,"//result/doc[2]/int[@name='id'][.='11']" diff --git a/solr/src/test/org/apache/solr/handler/TestCSVLoader.java b/solr/src/test/org/apache/solr/handler/TestCSVLoader.java index c6415c5b065..37a1f9f9dd4 100755 --- a/solr/src/test/org/apache/solr/handler/TestCSVLoader.java +++ b/solr/src/test/org/apache/solr/handler/TestCSVLoader.java @@ -41,6 +41,7 @@ public class TestCSVLoader extends SolrTestCaseJ4 { String def_charset = "UTF-8"; File file = new File(filename); + @Override @Before public void setUp() throws Exception { // if you override setUp or tearDown, you better call @@ -49,6 +50,7 @@ public class TestCSVLoader extends SolrTestCaseJ4 { cleanup(); } + @Override @After public void tearDown() throws Exception { // if you override setUp or tearDown, you better call @@ -76,7 +78,7 @@ public class TestCSVLoader extends SolrTestCaseJ4 { } void cleanup() { - assertU(delQ("id:[100 TO 110]")); + assertU(delQ("*:*")); assertU(commit()); } diff --git a/solr/src/test/org/apache/solr/handler/TestReplicationHandler.java b/solr/src/test/org/apache/solr/handler/TestReplicationHandler.java index a7670bd464b..ca75ff98c9f 100644 --- a/solr/src/test/org/apache/solr/handler/TestReplicationHandler.java +++ b/solr/src/test/org/apache/solr/handler/TestReplicationHandler.java @@ -25,9 +25,11 @@ import org.apache.lucene.store.SimpleFSDirectory; import org.apache.solr.SolrTestCaseJ4; import org.apache.solr.TestDistributedSearch; import org.apache.solr.client.solrj.SolrServer; +import org.apache.solr.client.solrj.SolrRequest; import org.apache.solr.client.solrj.SolrServerException; import org.apache.solr.client.solrj.embedded.JettySolrRunner; import org.apache.solr.client.solrj.impl.CommonsHttpSolrServer; +import org.apache.solr.client.solrj.request.QueryRequest; import org.apache.solr.client.solrj.response.QueryResponse; import org.apache.solr.common.SolrDocument; import org.apache.solr.common.SolrDocumentList; @@ -42,6 +44,8 @@ import org.junit.Test; import java.io.*; import java.net.URL; +import java.util.Map; +import java.util.HashMap; /** * Test for ReplicationHandler @@ -53,7 +57,6 @@ public class TestReplicationHandler extends SolrTestCaseJ4 { private static final String CONF_DIR = "." + File.separator + "solr" + File.separator + "conf" + File.separator; - private static final String SLAVE_CONFIG = CONF_DIR + "solrconfig-slave.xml"; static JettySolrRunner masterJetty, slaveJetty; static SolrServer masterClient, slaveClient; @@ -157,6 +160,80 @@ public class TestReplicationHandler extends SolrTestCaseJ4 { return res; } + private NamedList getDetails(SolrServer s) throws Exception { + + + ModifiableSolrParams params = new ModifiableSolrParams(); + params.set("command","details"); + params.set("qt","/replication"); + QueryRequest req = new QueryRequest(params); + + NamedList res = s.request(req); + + assertNotNull("null response from server", res); + + @SuppressWarnings("unchecked") NamedList details + = (NamedList) res.get("details"); + + assertNotNull("null details", details); + + return details; + } + + @Test + public void testDetails() throws Exception { + { + NamedList details = getDetails(masterClient); + + assertEquals("master isMaster?", + "true", details.get("isMaster")); + assertEquals("master isSlave?", + "false", details.get("isSlave")); + assertNotNull("master has master section", + details.get("master")); + } + { + NamedList details = getDetails(slaveClient); + + assertEquals("slave isMaster?", + "false", details.get("isMaster")); + assertEquals("slave isSlave?", + "true", details.get("isSlave")); + assertNotNull("slave has slave section", + details.get("slave")); + } + + SolrInstance repeater = null; + JettySolrRunner repeaterJetty = null; + SolrServer repeaterClient = null; + try { + repeater = new SolrInstance("repeater", masterJetty.getLocalPort()); + repeater.setUp(); + repeaterJetty = createJetty(repeater); + repeaterClient = createNewSolrServer(repeaterJetty.getLocalPort()); + + + NamedList details = getDetails(repeaterClient); + + assertEquals("repeater isMaster?", + "true", details.get("isMaster")); + assertEquals("repeater isSlave?", + "true", details.get("isSlave")); + assertNotNull("repeater has master section", + details.get("master")); + assertNotNull("repeater has slave section", + details.get("slave")); + + } finally { + try { + if (repeaterJetty != null) repeaterJetty.stop(); + } catch (Exception e) { /* :NOOP: */ } + try { + if (repeater != null) repeater.tearDown(); + } catch (Exception e) { /* :NOOP: */ } + } + } + @Test public void testReplicateAfterWrite2Slave() throws Exception { clearIndexWithReplication(); @@ -250,14 +327,15 @@ public class TestReplicationHandler extends SolrTestCaseJ4 { masterClient.commit(); //change the schema on master - copyFile(getFile(CONF_DIR + "schema-replication2.xml"), new File(master.getConfDir(), "schema.xml")); + master.copyConfigFile(CONF_DIR + "schema-replication2.xml", "schema.xml"); masterJetty.stop(); masterJetty = createJetty(master); masterClient = createNewSolrServer(masterJetty.getLocalPort()); - copyFile(getFile(SLAVE_CONFIG), new File(slave.getConfDir(), "solrconfig.xml"), masterJetty.getLocalPort()); + slave.setTestPort(masterJetty.getLocalPort()); + slave.copyConfigFile(slave.getSolrConfigFile(), "solrconfig.xml"); slaveJetty.stop(); slaveJetty = createJetty(slave); @@ -349,7 +427,7 @@ public class TestReplicationHandler extends SolrTestCaseJ4 { public void testSnapPullWithMasterUrl() throws Exception { //change solrconfig on slave //this has no entry for pollinginterval - copyFile(getFile(CONF_DIR + "solrconfig-slave1.xml"), new File(slave.getConfDir(), "solrconfig.xml"), masterJetty.getLocalPort()); + slave.copyConfigFile(CONF_DIR + "solrconfig-slave1.xml", "solrconfig.xml"); slaveJetty.stop(); slaveJetty = createJetty(slave); slaveClient = createNewSolrServer(slaveJetty.getLocalPort()); @@ -386,7 +464,7 @@ public class TestReplicationHandler extends SolrTestCaseJ4 { // NOTE: at this point, the slave is not polling any more // restore it. - copyFile(getFile(CONF_DIR + "solrconfig-slave.xml"), new File(slave.getConfDir(), "solrconfig.xml"), masterJetty.getLocalPort()); + slave.copyConfigFile(CONF_DIR + "solrconfig-slave.xml", "solrconfig.xml"); slaveJetty.stop(); slaveJetty = createJetty(slave); slaveClient = createNewSolrServer(slaveJetty.getLocalPort()); @@ -410,15 +488,16 @@ public class TestReplicationHandler extends SolrTestCaseJ4 { assertEquals(nDocs, masterQueryResult.getNumFound()); //change solrconfig having 'replicateAfter startup' option on master - copyFile(getFile(CONF_DIR + "solrconfig-master2.xml"), - new File(master.getConfDir(), "solrconfig.xml")); + master.copyConfigFile(CONF_DIR + "solrconfig-master2.xml", + "solrconfig.xml"); masterJetty.stop(); masterJetty = createJetty(master); masterClient = createNewSolrServer(masterJetty.getLocalPort()); - copyFile(getFile(SLAVE_CONFIG), new File(slave.getConfDir(), "solrconfig.xml"), masterJetty.getLocalPort()); + slave.setTestPort(masterJetty.getLocalPort()); + slave.copyConfigFile(slave.getSolrConfigFile(), "solrconfig.xml"); //start slave slaveJetty = createJetty(slave); @@ -435,11 +514,14 @@ public class TestReplicationHandler extends SolrTestCaseJ4 { // NOTE: the master only replicates after startup now! // revert that change. - copyFile(getFile(CONF_DIR + "solrconfig-master.xml"), new File(master.getConfDir(), "solrconfig.xml")); + master.copyConfigFile(CONF_DIR + "solrconfig-master.xml", "solrconfig.xml"); masterJetty.stop(); masterJetty = createJetty(master); masterClient = createNewSolrServer(masterJetty.getLocalPort()); - copyFile(getFile(SLAVE_CONFIG), new File(slave.getConfDir(), "solrconfig.xml"), masterJetty.getLocalPort()); + + slave.setTestPort(masterJetty.getLocalPort()); + slave.copyConfigFile(slave.getSolrConfigFile(), "solrconfig.xml"); + //start slave slaveJetty.stop(); slaveJetty = createJetty(slave); @@ -477,20 +559,24 @@ public class TestReplicationHandler extends SolrTestCaseJ4 { masterClient.commit(); //change solrconfig on master - copyFile(getFile(CONF_DIR + "solrconfig-master1.xml"), new File(master.getConfDir(), "solrconfig.xml")); + master.copyConfigFile(CONF_DIR + "solrconfig-master1.xml", + "solrconfig.xml"); //change schema on master - copyFile(getFile(CONF_DIR + "schema-replication2.xml"), new File(master.getConfDir(), "schema.xml")); + master.copyConfigFile(CONF_DIR + "schema-replication2.xml", + "schema.xml"); //keep a copy of the new schema - copyFile(getFile(CONF_DIR + "schema-replication2.xml"), new File(master.getConfDir(), "schema-replication2.xml")); + master.copyConfigFile(CONF_DIR + "schema-replication2.xml", + "schema-replication2.xml"); masterJetty.stop(); masterJetty = createJetty(master); masterClient = createNewSolrServer(masterJetty.getLocalPort()); - copyFile(getFile(SLAVE_CONFIG), new File(slave.getConfDir(), "solrconfig.xml"), masterJetty.getLocalPort()); + slave.setTestPort(masterJetty.getLocalPort()); + slave.copyConfigFile(slave.getSolrConfigFile(), "solrconfig.xml"); slaveJetty.stop(); slaveJetty = createJetty(slave); @@ -520,14 +606,13 @@ public class TestReplicationHandler extends SolrTestCaseJ4 { @Test public void testBackup() throws Exception { - masterJetty.stop(); - copyFile(getFile(CONF_DIR + "solrconfig-master1.xml"), new File(master.getConfDir(), "solrconfig.xml")); + master.copyConfigFile(CONF_DIR + "solrconfig-master1.xml", + "solrconfig.xml"); masterJetty = createJetty(master); masterClient = createNewSolrServer(masterJetty.getLocalPort()); - nDocs--; masterClient.deleteByQuery("*:*"); for (int i = 0; i < nDocs; i++) @@ -537,6 +622,7 @@ public class TestReplicationHandler extends SolrTestCaseJ4 { class BackupThread extends Thread { volatile String fail = null; + @Override public void run() { String masterUrl = "http://localhost:" + masterJetty.getLocalPort() + "/solr/replication?command=" + ReplicationHandler.CMD_BACKUP; URL url; @@ -561,6 +647,7 @@ public class TestReplicationHandler extends SolrTestCaseJ4 { volatile String fail = null; volatile String response = null; volatile boolean success = false; + @Override public void run() { String masterUrl = "http://localhost:" + masterJetty.getLocalPort() + "/solr/replication?command=" + ReplicationHandler.CMD_DETAILS; URL url; @@ -568,7 +655,7 @@ public class TestReplicationHandler extends SolrTestCaseJ4 { try { url = new URL(masterUrl); stream = url.openStream(); - response = IOUtils.toString(stream); + response = IOUtils.toString(stream, "UTF-8"); if(response.contains("success")) { success = true; } @@ -620,6 +707,7 @@ public class TestReplicationHandler extends SolrTestCaseJ4 { assertEquals(nDocs, hits.totalHits); searcher.close(); dir.close(); + AbstractSolrTestCase.recurseDelete(snapDir); // clean up the snap dir } /* character copy of file using UTF-8 */ @@ -646,19 +734,22 @@ public class TestReplicationHandler extends SolrTestCaseJ4 { private static class SolrInstance { - String name; - Integer masterPort; - File homeDir; - File confDir; - File dataDir; + private String name; + private Integer testPort; + private File homeDir; + private File confDir; + private File dataDir; /** - * if masterPort is null, this instance is a master -- otherwise this instance is a slave, and assumes the master is - * on localhost at the specified port. + * @param name used to pick new solr home dir, as well as which + * "solrconfig-${name}.xml" file gets copied + * to solrconfig.xml in new conf dir. + * @param testPort if not null, used as a replacement for + * TEST_PORT in the cloned config files. */ - public SolrInstance(String name, Integer port) { + public SolrInstance(String name, Integer testPort) { this.name = name; - this.masterPort = port; + this.testPort = testPort; } public String getHomeDir() { @@ -678,43 +769,47 @@ public class TestReplicationHandler extends SolrTestCaseJ4 { } public String getSolrConfigFile() { - String fname = ""; - if (null == masterPort) - fname = CONF_DIR + "solrconfig-master.xml"; - else - fname = SLAVE_CONFIG; - return fname; + return CONF_DIR + "solrconfig-"+name+".xml"; + } + + /** If it needs to change */ + public void setTestPort(Integer testPort) { + this.testPort = testPort; } public void setUp() throws Exception { System.setProperty("solr.test.sys.prop1", "propone"); System.setProperty("solr.test.sys.prop2", "proptwo"); - File home = new File(TEMP_DIR, - getClass().getName() + "-" + System.currentTimeMillis()); + File home = new File(TEMP_DIR, + getClass().getName() + "-" + + System.currentTimeMillis()); + - if (null == masterPort) { - homeDir = new File(home, "master"); - dataDir = new File(homeDir, "data"); - confDir = new File(homeDir, "conf"); - } else { - homeDir = new File(home, "slave"); - dataDir = new File(homeDir, "data"); - confDir = new File(homeDir, "conf"); - } + homeDir = new File(home, name); + dataDir = new File(homeDir, "data"); + confDir = new File(homeDir, "conf"); homeDir.mkdirs(); dataDir.mkdirs(); confDir.mkdirs(); File f = new File(confDir, "solrconfig.xml"); - copyFile(getFile(getSolrConfigFile()), f, masterPort); - f = new File(confDir, "schema.xml"); - copyFile(getFile(getSchemaFile()), f); + copyConfigFile(getSolrConfigFile(), "solrconfig.xml"); + copyConfigFile(getSchemaFile(), "schema.xml"); } public void tearDown() throws Exception { AbstractSolrTestCase.recurseDelete(homeDir); } + + public void copyConfigFile(String srcFile, String destFile) + throws IOException { + + copyFile(getFile(srcFile), + new File(confDir, destFile), + testPort); + } + } } diff --git a/solr/src/test/org/apache/solr/handler/component/DistributedSpellCheckComponentTest.java b/solr/src/test/org/apache/solr/handler/component/DistributedSpellCheckComponentTest.java index 75a968d4f7f..ed0edbb97b6 100644 --- a/solr/src/test/org/apache/solr/handler/component/DistributedSpellCheckComponentTest.java +++ b/solr/src/test/org/apache/solr/handler/component/DistributedSpellCheckComponentTest.java @@ -1,5 +1,22 @@ package org.apache.solr.handler.component; +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + import org.apache.solr.BaseDistributedSearchTestCase; import org.apache.solr.client.solrj.SolrServer; import org.apache.solr.common.params.ModifiableSolrParams; diff --git a/solr/src/test/org/apache/solr/handler/component/DistributedTermsComponentTest.java b/solr/src/test/org/apache/solr/handler/component/DistributedTermsComponentTest.java index ac3b7094c1e..bcf91c268d3 100644 --- a/solr/src/test/org/apache/solr/handler/component/DistributedTermsComponentTest.java +++ b/solr/src/test/org/apache/solr/handler/component/DistributedTermsComponentTest.java @@ -1,5 +1,22 @@ package org.apache.solr.handler.component; +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + import org.apache.solr.BaseDistributedSearchTestCase; /** diff --git a/solr/src/test/org/apache/solr/handler/component/QueryElevationComponentTest.java b/solr/src/test/org/apache/solr/handler/component/QueryElevationComponentTest.java index b165298fa7d..821c838af7c 100644 --- a/solr/src/test/org/apache/solr/handler/component/QueryElevationComponentTest.java +++ b/solr/src/test/org/apache/solr/handler/component/QueryElevationComponentTest.java @@ -55,6 +55,14 @@ public class QueryElevationComponentTest extends SolrTestCaseJ4 { clearIndex(); assertU(commit()); assertU(optimize()); + // make sure this component is initialized correctly for each test + QueryElevationComponent comp = (QueryElevationComponent)h.getCore().getSearchComponent("elevate"); + NamedList args = new NamedList(); + args.add( QueryElevationComponent.CONFIG_FILE, "elevate.xml" ); + args.add( QueryElevationComponent.FIELD_TYPE, "string" ); + comp.init( args ); + comp.inform( h.getCore() ); + comp.forceElevation = false; } @Test @@ -71,7 +79,7 @@ public class QueryElevationComponentTest extends SolrTestCaseJ4 { comp.inform( core ); SolrQueryRequest req = req(); - IndexReader reader = req.getSearcher().getReader(); + IndexReader reader = req.getSearcher().getIndexReader(); Map map = comp.getElevationMap( reader, core ); req.close(); @@ -112,13 +120,13 @@ public class QueryElevationComponentTest extends SolrTestCaseJ4 { @Test public void testSorting() throws IOException { - assertU(adoc("id", "a", "title", "ipod", "str_s", "a" )); - assertU(adoc("id", "b", "title", "ipod ipod", "str_s", "b" )); - assertU(adoc("id", "c", "title", "ipod ipod ipod", "str_s", "c" )); + assertU(adoc("id", "a", "title", "ipod", "str_s1", "a" )); + assertU(adoc("id", "b", "title", "ipod ipod", "str_s1", "b" )); + assertU(adoc("id", "c", "title", "ipod ipod ipod", "str_s1", "c" )); - assertU(adoc("id", "x", "title", "boosted", "str_s", "x" )); - assertU(adoc("id", "y", "title", "boosted boosted", "str_s", "y" )); - assertU(adoc("id", "z", "title", "boosted boosted boosted", "str_s", "z" )); + assertU(adoc("id", "x", "title", "boosted", "str_s1", "x" )); + assertU(adoc("id", "y", "title", "boosted boosted", "str_s1", "y" )); + assertU(adoc("id", "z", "title", "boosted boosted boosted", "str_s1", "z" )); assertU(commit()); String query = "title:ipod"; @@ -130,7 +138,7 @@ public class QueryElevationComponentTest extends SolrTestCaseJ4 { args.put( "indent", "true" ); //args.put( CommonParams.FL, "id,title,score" ); SolrQueryRequest req = new LocalSolrQueryRequest( h.getCore(), new MapSolrParams( args) ); - IndexReader reader = req.getSearcher().getReader(); + IndexReader reader = req.getSearcher().getIndexReader(); QueryElevationComponent booster = (QueryElevationComponent)req.getCore().getSearchComponent( "elevate" ); assertQ("Make sure standard sort works as expected", req @@ -180,7 +188,7 @@ public class QueryElevationComponentTest extends SolrTestCaseJ4 { // Try normal sort by 'id' // default 'forceBoost' should be false assertEquals( false, booster.forceElevation ); - args.put( CommonParams.SORT, "str_s asc" ); + args.put( CommonParams.SORT, "str_s1 asc" ); assertQ( null, req ,"//*[@numFound='4']" ,"//result/doc[1]/str[@name='id'][.='a']" @@ -255,7 +263,7 @@ public class QueryElevationComponentTest extends SolrTestCaseJ4 { comp.inform( h.getCore() ); SolrQueryRequest req = req(); - IndexReader reader = req.getSearcher().getReader(); + IndexReader reader = req.getSearcher().getIndexReader(); Map map = comp.getElevationMap(reader, h.getCore()); assertTrue( map.get( "aaa" ).priority.containsKey( new BytesRef("A") ) ); assertNull( map.get( "bbb" ) ); @@ -267,7 +275,7 @@ public class QueryElevationComponentTest extends SolrTestCaseJ4 { assertU(commit()); req = req(); - reader = req.getSearcher().getReader(); + reader = req.getSearcher().getIndexReader(); map = comp.getElevationMap(reader, h.getCore()); assertNull( map.get( "aaa" ) ); assertTrue( map.get( "bbb" ).priority.containsKey( new BytesRef("B") ) ); diff --git a/solr/src/test/org/apache/solr/handler/component/SpellCheckComponentTest.java b/solr/src/test/org/apache/solr/handler/component/SpellCheckComponentTest.java index 66f353ee497..30972f8b881 100644 --- a/solr/src/test/org/apache/solr/handler/component/SpellCheckComponentTest.java +++ b/solr/src/test/org/apache/solr/handler/component/SpellCheckComponentTest.java @@ -40,6 +40,11 @@ public class SpellCheckComponentTest extends SolrTestCaseJ4 { @BeforeClass public static void beforeClass() throws Exception { initCore("solrconfig-spellcheckcomponent.xml","schema.xml"); + } + + @Override + public void setUp() throws Exception { + super.setUp(); assertU(adoc("id", "0", "lowerfilt", "This is a title")); assertU((adoc("id", "1", "lowerfilt", "The quick reb fox jumped over the lazy brown dogs."))); @@ -55,6 +60,15 @@ public class SpellCheckComponentTest extends SolrTestCaseJ4 { assertU((commit())); } + @Override + public void tearDown() throws Exception { + super.tearDown(); + assertU(delQ("*:*")); + optimize(); + assertU((commit())); + + } + @Test public void testExtendedResultsCount() throws Exception { assertJQ(req("qt",rh, SpellCheckComponent.COMPONENT_NAME, "true", SpellCheckComponent.SPELLCHECK_BUILD, "true", "q","bluo", SpellCheckComponent.SPELLCHECK_COUNT,"5", SpellCheckComponent.SPELLCHECK_EXTENDED_RESULTS,"false") diff --git a/solr/src/test/org/apache/solr/highlight/HighlighterTest.java b/solr/src/test/org/apache/solr/highlight/HighlighterTest.java index dba81809474..46050f9e39b 100755 --- a/solr/src/test/org/apache/solr/highlight/HighlighterTest.java +++ b/solr/src/test/org/apache/solr/highlight/HighlighterTest.java @@ -769,4 +769,30 @@ public class HighlighterTest extends SolrTestCaseJ4 { ); } + + public void testSubwordWildcardHighlight() { + assertU(adoc("subword", "lorem PowerShot.com ipsum", "id", "1")); + assertU(commit()); + assertQ("subword wildcard highlighting", + req("q", "subword:pow*", "hl", "true", "hl.fl", "subword"), + "//lst[@name='highlighting']/lst[@name='1']" + + "/arr[@name='subword']/str='lorem PowerShot.com ipsum'"); + } + + public void testSubwordWildcardHighlightWithTermOffsets() { + assertU(adoc("subword_offsets", "lorem PowerShot.com ipsum", "id", "1")); + assertU(commit()); + assertQ("subword wildcard highlighting", + req("q", "subword_offsets:pow*", "hl", "true", "hl.fl", "subword_offsets"), + "//lst[@name='highlighting']/lst[@name='1']" + + "/arr[@name='subword_offsets']/str='lorem PowerShot.com ipsum'"); + } + public void testSubwordWildcardHighlightWithTermOffsets2() { + assertU(adoc("subword_offsets", "lorem PowerShot ipsum", "id", "1")); + assertU(commit()); + assertQ("subword wildcard highlighting", + req("q", "subword_offsets:pow*", "hl", "true", "hl.fl", "subword_offsets"), + "//lst[@name='highlighting']/lst[@name='1']" + + "/arr[@name='subword_offsets']/str='lorem PowerShot ipsum'"); + } } diff --git a/solr/src/test/org/apache/solr/request/JSONWriterTest.java b/solr/src/test/org/apache/solr/request/JSONWriterTest.java index 19630e6b86a..9fb07e11882 100644 --- a/solr/src/test/org/apache/solr/request/JSONWriterTest.java +++ b/solr/src/test/org/apache/solr/request/JSONWriterTest.java @@ -65,21 +65,6 @@ public class JSONWriterTest extends SolrTestCaseJ4 { req.close(); } - @Test - public void testPHPS() throws IOException { - SolrQueryRequest req = req("dummy"); - SolrQueryResponse rsp = new SolrQueryResponse(); - QueryResponseWriter w = new PHPSerializedResponseWriter(); - - StringWriter buf = new StringWriter(); - rsp.add("data1", "hello"); - rsp.add("data2", 42); - rsp.add("data3", true); - w.write(buf, req, rsp); - assertEquals(buf.toString(), "a:3:{s:5:\"data1\";s:5:\"hello\";s:5:\"data2\";i:42;s:5:\"data3\";b:1;}"); - req.close(); - } - @Test public void testJSON() throws IOException { SolrQueryRequest req = req("wt","json","json.nl","arrarr"); diff --git a/solr/src/test/org/apache/solr/request/SimpleFacetsTest.java b/solr/src/test/org/apache/solr/request/SimpleFacetsTest.java index 7f9b76e671c..b383853b61a 100644 --- a/solr/src/test/org/apache/solr/request/SimpleFacetsTest.java +++ b/solr/src/test/org/apache/solr/request/SimpleFacetsTest.java @@ -169,6 +169,16 @@ public class SimpleFacetsTest extends SolrTestCaseJ4 { ,"//lst[@name='trait_s']/int[@name='Pig'][.='1']" ); + // test excluding main query + assertQ(req("q", "{!tag=main}id:43" + ,"facet", "true" + ,"facet.query", "{!key=foo}id:42" + ,"facet.query", "{!ex=main key=bar}id:42" // only matches when we exclude main query + ) + ,"//lst[@name='facet_queries']/int[@name='foo'][.='0']" + ,"//lst[@name='facet_queries']/int[@name='bar'][.='1']" + ); + assertQ("check counts for applied facet queries using filtering (fq)", req("q", "id:[42 TO 47]" ,"facet", "true" diff --git a/solr/src/test/org/apache/solr/request/TestBinaryResponseWriter.java b/solr/src/test/org/apache/solr/request/TestBinaryResponseWriter.java index 2d6063d2e4f..703a7e121b5 100644 --- a/solr/src/test/org/apache/solr/request/TestBinaryResponseWriter.java +++ b/solr/src/test/org/apache/solr/request/TestBinaryResponseWriter.java @@ -38,10 +38,12 @@ import java.util.UUID; */ public class TestBinaryResponseWriter extends AbstractSolrTestCase { + @Override public String getSchemaFile() { return "schema12.xml"; } + @Override public String getSolrConfigFile() { return "solrconfig.xml"; } diff --git a/solr/src/test/org/apache/solr/request/TestFaceting.java b/solr/src/test/org/apache/solr/request/TestFaceting.java index fa5b6cdd1e0..b9e1a5f8a9e 100755 --- a/solr/src/test/org/apache/solr/request/TestFaceting.java +++ b/solr/src/test/org/apache/solr/request/TestFaceting.java @@ -67,14 +67,14 @@ public class TestFaceting extends SolrTestCaseJ4 { req = lrf.makeRequest("q","*:*"); TermIndex ti = new TermIndex(proto.field()); - NumberedTermsEnum te = ti.getEnumerator(req.getSearcher().getReader()); + NumberedTermsEnum te = ti.getEnumerator(req.getSearcher().getIndexReader()); // iterate through first while(te.term() != null) te.next(); assertEquals(size, te.getTermNumber()); te.close(); - te = ti.getEnumerator(req.getSearcher().getReader()); + te = ti.getEnumerator(req.getSearcher().getIndexReader()); Random r = new Random(size); // test seeking by term string diff --git a/solr/src/test/org/apache/solr/request/TestWriterPerf.java b/solr/src/test/org/apache/solr/request/TestWriterPerf.java index e7da448650a..7bc0d774411 100755 --- a/solr/src/test/org/apache/solr/request/TestWriterPerf.java +++ b/solr/src/test/org/apache/solr/request/TestWriterPerf.java @@ -37,15 +37,19 @@ public class TestWriterPerf extends AbstractSolrTestCase { public static final Logger log = LoggerFactory.getLogger(TestWriterPerf.class); + @Override public String getSchemaFile() { return "schema11.xml"; } + @Override public String getSolrConfigFile() { return "solrconfig-functionquery.xml"; } public String getCoreName() { return "basic"; } + @Override public void setUp() throws Exception { // if you override setUp or tearDown, you better call // the super classes version super.setUp(); } + @Override public void tearDown() throws Exception { // if you override setUp or tearDown, you better call // the super classes version diff --git a/solr/src/test/org/apache/solr/response/TestPHPSerializedResponseWriter.java b/solr/src/test/org/apache/solr/response/TestPHPSerializedResponseWriter.java new file mode 100644 index 00000000000..d67e1fb8cb8 --- /dev/null +++ b/solr/src/test/org/apache/solr/response/TestPHPSerializedResponseWriter.java @@ -0,0 +1,107 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.response; + +import java.io.IOException; +import java.io.StringWriter; +import java.util.Arrays; +import java.util.LinkedHashMap; + +import org.apache.solr.SolrTestCaseJ4; +import org.apache.solr.common.util.NamedList; +import org.apache.solr.response.PHPSerializedResponseWriter; +import org.apache.solr.request.SolrQueryRequest; +import org.apache.solr.response.QueryResponseWriter; +import org.apache.solr.response.SolrQueryResponse; +import org.apache.solr.common.SolrDocument; +import org.apache.solr.common.SolrDocumentList; +import org.junit.BeforeClass; +import org.junit.Test; + +/** + * Basic PHPS tests based on JSONWriterTest + * + */ +public class TestPHPSerializedResponseWriter extends SolrTestCaseJ4 { + @BeforeClass + public static void beforeClass() throws Exception { + initCore("solrconfig.xml","schema.xml"); + } + + @Test + public void testSimple() throws IOException { + SolrQueryRequest req = req("dummy"); + SolrQueryResponse rsp = new SolrQueryResponse(); + QueryResponseWriter w = new PHPSerializedResponseWriter(); + + StringWriter buf = new StringWriter(); + rsp.add("data1", "hello"); + rsp.add("data2", 42); + rsp.add("data3", true); + w.write(buf, req, rsp); + assertEquals("a:3:{s:5:\"data1\";s:5:\"hello\";s:5:\"data2\";i:42;s:5:\"data3\";b:1;}", + buf.toString()); + req.close(); + } + + + @Test + public void testSolrDocuments() throws IOException { + SolrQueryRequest req = req("q","*:*"); + SolrQueryResponse rsp = new SolrQueryResponse(); + QueryResponseWriter w = new PHPSerializedResponseWriter(); + StringWriter buf = new StringWriter(); + + SolrDocument d = new SolrDocument(); + + SolrDocument d1 = d; + d.addField("id","1"); + d.addField("data1","hello"); + d.addField("data2",42); + d.addField("data3",true); + + // multivalued fields: + + // extremely odd edge case: value is a map + + // we use LinkedHashMap because we are doing a string comparison + // later and we need predictible ordering + LinkedHashMap nl = new LinkedHashMap(); + nl.put("data4.1", "hashmap"); + nl.put("data4.2", "hello"); + d.addField("data4",nl); + // array value + d.addField("data5",Arrays.asList("data5.1", "data5.2", "data5.3")); + + // adding one more document to test array indexes + d = new SolrDocument(); + SolrDocument d2 = d; + d.addField("id","2"); + + SolrDocumentList sdl = new SolrDocumentList(); + sdl.add(d1); + sdl.add(d2); + rsp.add("response", sdl); + + w.write(buf, req, rsp); + assertEquals("a:1:{s:8:\"response\";a:3:{s:8:\"numFound\";i:0;s:5:\"start\";i:0;s:4:\"docs\";a:2:{i:0;a:6:{s:2:\"id\";s:1:\"1\";s:5:\"data1\";s:5:\"hello\";s:5:\"data2\";i:42;s:5:\"data3\";b:1;s:5:\"data4\";a:2:{s:7:\"data4.1\";s:7:\"hashmap\";s:7:\"data4.2\";s:5:\"hello\";}s:5:\"data5\";a:3:{i:0;s:7:\"data5.1\";i:1;s:7:\"data5.2\";i:2;s:7:\"data5.3\";}}i:1;a:1:{s:2:\"id\";s:1:\"2\";}}}}", + buf.toString()); + req.close(); + } + +} diff --git a/solr/src/test/org/apache/solr/schema/CustomSimilarityFactory.java b/solr/src/test/org/apache/solr/schema/CustomSimilarityFactory.java index a770296b44b..03fbaecaec2 100644 --- a/solr/src/test/org/apache/solr/schema/CustomSimilarityFactory.java +++ b/solr/src/test/org/apache/solr/schema/CustomSimilarityFactory.java @@ -16,10 +16,11 @@ */ package org.apache.solr.schema; -import org.apache.lucene.search.Similarity; +import org.apache.lucene.search.SimilarityProvider; public class CustomSimilarityFactory extends SimilarityFactory { - public Similarity getSimilarity() { + @Override + public SimilarityProvider getSimilarityProvider() { return new MockConfigurableSimilarity(params.get("echo")); } } diff --git a/solr/src/test/org/apache/solr/schema/DateFieldTest.java b/solr/src/test/org/apache/solr/schema/DateFieldTest.java index 334067f6608..9168d4fe993 100644 --- a/solr/src/test/org/apache/solr/schema/DateFieldTest.java +++ b/solr/src/test/org/apache/solr/schema/DateFieldTest.java @@ -29,6 +29,7 @@ public class DateFieldTest extends LuceneTestCase { protected DateField f = null; protected DateMathParser p = new DateMathParser(UTC, Locale.US); + @Override public void setUp() throws Exception { super.setUp(); f = new DateField(); diff --git a/solr/src/test/org/apache/solr/schema/IndexSchemaTest.java b/solr/src/test/org/apache/solr/schema/IndexSchemaTest.java index cb4b90a1c34..cb176d8dee8 100644 --- a/solr/src/test/org/apache/solr/schema/IndexSchemaTest.java +++ b/solr/src/test/org/apache/solr/schema/IndexSchemaTest.java @@ -27,7 +27,7 @@ import org.apache.solr.common.params.MapSolrParams; import org.apache.solr.core.SolrCore; import org.apache.solr.request.LocalSolrQueryRequest; import org.apache.solr.request.SolrQueryRequest; -import org.apache.lucene.search.Similarity; +import org.apache.lucene.search.SimilarityProvider; import org.junit.BeforeClass; import org.junit.Test; @@ -83,7 +83,7 @@ public class IndexSchemaTest extends SolrTestCaseJ4 { @Test public void testSimilarityFactory() { SolrCore core = h.getCore(); - Similarity similarity = core.getSchema().getSimilarity(); + SimilarityProvider similarity = core.getSchema().getSimilarityProvider(); assertTrue("wrong class", similarity instanceof MockConfigurableSimilarity); assertEquals("is there an echo?", ((MockConfigurableSimilarity)similarity).getPassthrough()); } diff --git a/solr/src/test/org/apache/solr/schema/TestBinaryField.java b/solr/src/test/org/apache/solr/schema/TestBinaryField.java index 7fab9f0c188..72fb0f1225b 100644 --- a/solr/src/test/org/apache/solr/schema/TestBinaryField.java +++ b/solr/src/test/org/apache/solr/schema/TestBinaryField.java @@ -174,6 +174,7 @@ public class TestBinaryField extends LuceneTestCase { } + @Override public void tearDown() throws Exception { jetty.stop(); super.tearDown(); diff --git a/solr/src/test/org/apache/solr/search/FooQParserPlugin.java b/solr/src/test/org/apache/solr/search/FooQParserPlugin.java index 397e1c349ac..b58006fc2a3 100755 --- a/solr/src/test/org/apache/solr/search/FooQParserPlugin.java +++ b/solr/src/test/org/apache/solr/search/FooQParserPlugin.java @@ -27,6 +27,7 @@ import org.apache.lucene.index.Term; public class FooQParserPlugin extends QParserPlugin { + @Override public QParser createParser(String qstr, SolrParams localParams, SolrParams params, SolrQueryRequest req) { return new FooQParser(qstr, localParams, params, req); } @@ -40,6 +41,7 @@ class FooQParser extends QParser { super(qstr, localParams, params, req); } + @Override public Query parse() throws ParseException { return new TermQuery(new Term(localParams.get(QueryParsing.F), localParams.get(QueryParsing.V))); } diff --git a/solr/src/test/org/apache/solr/search/TestDocSet.java b/solr/src/test/org/apache/solr/search/TestDocSet.java index e52aecbf115..1e8d1377ee8 100644 --- a/solr/src/test/org/apache/solr/search/TestDocSet.java +++ b/solr/src/test/org/apache/solr/search/TestDocSet.java @@ -24,8 +24,11 @@ import java.io.IOException; import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.OpenBitSet; import org.apache.lucene.util.OpenBitSetIterator; +import org.apache.lucene.util.ReaderUtil; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.FilterIndexReader; +import org.apache.lucene.index.IndexReader.AtomicReaderContext; +import org.apache.lucene.index.IndexReader.ReaderContext; import org.apache.lucene.index.MultiReader; import org.apache.lucene.search.Filter; import org.apache.lucene.search.DocIdSet; @@ -403,7 +406,8 @@ public class TestDocSet extends LuceneTestCase { } } - public void doFilterTest(SolrIndexReader reader) throws IOException { + public void doFilterTest(IndexReader reader) throws IOException { + ReaderContext topLevelContext = reader.getTopReaderContext(); OpenBitSet bs = getRandomSet(reader.maxDoc(), rand.nextInt(reader.maxDoc()+1)); DocSet a = new BitDocSet(bs); DocSet b = getIntDocSet(bs); @@ -411,24 +415,30 @@ public class TestDocSet extends LuceneTestCase { Filter fa = a.getTopFilter(); Filter fb = b.getTopFilter(); + /*** top level filters are no longer supported // test top-level - DocIdSet da = fa.getDocIdSet(reader); - DocIdSet db = fb.getDocIdSet(reader); + DocIdSet da = fa.getDocIdSet(topLevelContext); + DocIdSet db = fb.getDocIdSet(topLevelContext); doTestIteratorEqual(da, db); + ***/ + + DocIdSet da; + DocIdSet db; // first test in-sequence sub readers - for (SolrIndexReader sir : reader.getLeafReaders()) { - da = fa.getDocIdSet(sir); - db = fb.getDocIdSet(sir); + for (AtomicReaderContext readerContext : ReaderUtil.leaves(topLevelContext)) { + da = fa.getDocIdSet(readerContext); + db = fb.getDocIdSet(readerContext); doTestIteratorEqual(da, db); } - int nReaders = reader.getLeafReaders().length; + AtomicReaderContext[] leaves = ReaderUtil.leaves(topLevelContext); + int nReaders = leaves.length; // now test out-of-sequence sub readers for (int i=0; i multi - assertEquals(r1.getLeafReaders()[0], r2.getLeafReaders()[0]); + assertEquals(ReaderUtil.leaves(rCtx1)[0].reader, ReaderUtil.leaves(rCtx2)[0].reader); assertU(adoc("id","5", "v_f","3.14159")); assertU(adoc("id","6", "v_f","8983", "v_s","string6")); assertU(commit()); SolrQueryRequest sr3 = req("q","foo"); - SolrIndexReader r3 = sr3.getSearcher().getReader(); + ReaderContext rCtx3 = sr3.getSearcher().getTopReaderContext(); // make sure the readers share segments // assertEquals(r1.getLeafReaders()[0], r3.getLeafReaders()[0]); - assertEquals(r2.getLeafReaders()[0], r3.getLeafReaders()[0]); - assertEquals(r2.getLeafReaders()[1], r3.getLeafReaders()[1]); + assertEquals(ReaderUtil.leaves(rCtx2)[0].reader, ReaderUtil.leaves(rCtx3)[0].reader); + assertEquals(ReaderUtil.leaves(rCtx2)[1].reader, ReaderUtil.leaves(rCtx3)[1].reader); sr1.close(); sr2.close(); // should currently be 1, but this could change depending on future index management - int baseRefCount = r3.getRefCount(); + int baseRefCount = rCtx3.reader.getRefCount(); assertEquals(1, baseRefCount); assertU(commit()); SolrQueryRequest sr4 = req("q","foo"); - SolrIndexReader r4 = sr4.getSearcher().getReader(); + ReaderContext rCtx4 = sr4.getSearcher().getTopReaderContext(); // force an index change so the registered searcher won't be the one we are testing (and // then we should be able to test the refCount going all the way to 0 @@ -97,23 +108,23 @@ public class TestIndexSearcher extends SolrTestCaseJ4 { assertU(commit()); // test that reader didn't change (according to equals at least... which uses the wrapped reader) - assertEquals(r3,r4); - assertEquals(baseRefCount+1, r4.getRefCount()); + assertEquals(rCtx3.reader, rCtx4.reader); + assertEquals(baseRefCount+1, rCtx4.reader.getRefCount()); sr3.close(); - assertEquals(baseRefCount, r4.getRefCount()); + assertEquals(baseRefCount, rCtx4.reader.getRefCount()); sr4.close(); - assertEquals(baseRefCount-1, r4.getRefCount()); + assertEquals(baseRefCount-1, rCtx4.reader.getRefCount()); SolrQueryRequest sr5 = req("q","foo"); - SolrIndexReader r5 = sr5.getSearcher().getReader(); + ReaderContext rCtx5 = sr5.getSearcher().getTopReaderContext(); assertU(delI("1")); assertU(commit()); SolrQueryRequest sr6 = req("q","foo"); - SolrIndexReader r6 = sr6.getSearcher().getReader(); - assertEquals(1, r6.getLeafReaders()[0].numDocs()); // only a single doc left in the first segment - assertTrue( !r5.getLeafReaders()[0].equals(r6.getLeafReaders()[0]) ); // readers now different + ReaderContext rCtx6 = sr6.getSearcher().getTopReaderContext(); + assertEquals(1, ReaderUtil.leaves(rCtx6)[0].reader.numDocs()); // only a single doc left in the first segment + assertTrue( !ReaderUtil.leaves(rCtx5)[0].reader.equals(ReaderUtil.leaves(rCtx6)[0].reader) ); // readers now different sr5.close(); sr6.close(); diff --git a/solr/src/test/org/apache/solr/search/TestLRUCache.java b/solr/src/test/org/apache/solr/search/TestLRUCache.java index 7439704f075..7ff5b762085 100644 --- a/solr/src/test/org/apache/solr/search/TestLRUCache.java +++ b/solr/src/test/org/apache/solr/search/TestLRUCache.java @@ -1,5 +1,22 @@ package org.apache.solr.search; +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + import java.io.IOException; import java.io.Serializable; import java.util.HashMap; diff --git a/solr/src/test/org/apache/solr/search/TestQueryTypes.java b/solr/src/test/org/apache/solr/search/TestQueryTypes.java index 6f61378bb5e..53f46213aa8 100755 --- a/solr/src/test/org/apache/solr/search/TestQueryTypes.java +++ b/solr/src/test/org/apache/solr/search/TestQueryTypes.java @@ -21,16 +21,20 @@ import org.apache.solr.util.AbstractSolrTestCase; public class TestQueryTypes extends AbstractSolrTestCase { + @Override public String getSchemaFile() { return "schema11.xml"; } + @Override public String getSolrConfigFile() { return "solrconfig.xml"; } public String getCoreName() { return "basic"; } + @Override public void setUp() throws Exception { // if you override setUp or tearDown, you better call // the super classes version super.setUp(); } + @Override public void tearDown() throws Exception { // if you override setUp or tearDown, you better call // the super classes version diff --git a/solr/src/test/org/apache/solr/search/TestQueryUtils.java b/solr/src/test/org/apache/solr/search/TestQueryUtils.java index e31cf4a2e36..3143431cc5e 100755 --- a/solr/src/test/org/apache/solr/search/TestQueryUtils.java +++ b/solr/src/test/org/apache/solr/search/TestQueryUtils.java @@ -31,12 +31,16 @@ import java.util.List; */ public class TestQueryUtils extends AbstractSolrTestCase { + @Override public String getSchemaFile() { return "schema.xml"; } + @Override public String getSolrConfigFile() { return "solrconfig.xml"; } + @Override public void setUp() throws Exception { super.setUp(); } + @Override public void tearDown() throws Exception { super.tearDown(); } diff --git a/solr/src/test/org/apache/solr/search/TestRangeQuery.java b/solr/src/test/org/apache/solr/search/TestRangeQuery.java index dcd920fdfc6..997d3d991b5 100644 --- a/solr/src/test/org/apache/solr/search/TestRangeQuery.java +++ b/solr/src/test/org/apache/solr/search/TestRangeQuery.java @@ -33,6 +33,7 @@ public class TestRangeQuery extends SolrTestCaseJ4 { initCore("solrconfig.xml", "schema11.xml"); } + @Override @Before public void setUp() throws Exception { // if you override setUp or tearDown, you better call diff --git a/solr/src/test/org/apache/solr/search/TestSearchPerf.java b/solr/src/test/org/apache/solr/search/TestSearchPerf.java index dde009e05f5..57e0d86d5f8 100755 --- a/solr/src/test/org/apache/solr/search/TestSearchPerf.java +++ b/solr/src/test/org/apache/solr/search/TestSearchPerf.java @@ -35,12 +35,16 @@ import java.io.IOException; */ public class TestSearchPerf extends AbstractSolrTestCase { + @Override public String getSchemaFile() { return "schema11.xml"; } + @Override public String getSolrConfigFile() { return "solrconfig.xml"; } + @Override public void setUp() throws Exception { super.setUp(); } + @Override public void tearDown() throws Exception { super.tearDown(); } diff --git a/solr/src/test/org/apache/solr/search/TestSort.java b/solr/src/test/org/apache/solr/search/TestSort.java index 23069740acd..b70b9c7e1cf 100755 --- a/solr/src/test/org/apache/solr/search/TestSort.java +++ b/solr/src/test/org/apache/solr/search/TestSort.java @@ -20,7 +20,7 @@ package org.apache.solr.search; import org.apache.lucene.analysis.core.SimpleAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; -import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexReader.AtomicReaderContext; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.search.*; @@ -33,7 +33,9 @@ import java.io.IOException; import java.util.*; public class TestSort extends AbstractSolrTestCase { + @Override public String getSchemaFile() { return null; } + @Override public String getSolrConfigFile() { return null; } Random r = random; @@ -49,6 +51,7 @@ public class TestSort extends AbstractSolrTestCase { String val; String val2; + @Override public String toString() { return "{id=" +doc + " val1="+val + " val2="+val2 + "}"; } @@ -63,8 +66,7 @@ public class TestSort extends AbstractSolrTestCase { IndexWriter iw = new IndexWriter( dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new SimpleAnalyzer(TEST_VERSION_CURRENT)). - setOpenMode(IndexWriterConfig.OpenMode.CREATE). - setMaxFieldLength(IndexWriterConfig.UNLIMITED_FIELD_LENGTH) + setOpenMode(IndexWriterConfig.OpenMode.CREATE) ); final MyDoc[] mydocs = new MyDoc[ndocs]; @@ -106,8 +108,8 @@ public class TestSort extends AbstractSolrTestCase { for (int i=0; i0931.0442muLti-Default2009-12-12T12:59:46.412Z4.02.01.0342muLti-Default2009-12-12T12:59:46.409Z3.02.01.0242muLti-Default2009-12-12T12:59:46.406Z2.02.01.0142muLti-Default2009-12-12T12:59:46.361Z0.02.0 -*/ \ No newline at end of file +*/ diff --git a/solr/src/test/org/apache/solr/search/function/TestFunctionQuery.java b/solr/src/test/org/apache/solr/search/function/TestFunctionQuery.java index 4cfebb79c94..300acd34b8a 100755 --- a/solr/src/test/org/apache/solr/search/function/TestFunctionQuery.java +++ b/solr/src/test/org/apache/solr/search/function/TestFunctionQuery.java @@ -17,6 +17,7 @@ package org.apache.solr.search.function; +import org.apache.lucene.index.FieldInvertState; import org.apache.lucene.search.DefaultSimilarity; import org.apache.lucene.search.FieldCache; import org.apache.lucene.search.Similarity; @@ -294,8 +295,11 @@ public class TestFunctionQuery extends SolrTestCaseJ4 { "//float[@name='score']='" + similarity.idf(3,6) + "'"); assertQ(req("fl","*,score","q", "{!func}tf(a_t,cow)", "fq","id:6"), "//float[@name='score']='" + similarity.tf(5) + "'"); + FieldInvertState state = new FieldInvertState(); + state.setBoost(1.0f); + state.setLength(4); assertQ(req("fl","*,score","q", "{!func}norm(a_t)", "fq","id:2"), - "//float[@name='score']='" + similarity.lengthNorm("a_t",4) + "'"); // sqrt(4)==2 and is exactly representable when quantized to a byte + "//float[@name='score']='" + similarity.computeNorm("a_t",state) + "'"); // sqrt(4)==2 and is exactly representable when quantized to a byte // test that ord and rord are working on a global index basis, not just // at the segment level (since Lucene 2.9 has switched to per-segment searching) @@ -322,17 +326,18 @@ public class TestFunctionQuery extends SolrTestCaseJ4 { assertU(adoc("id",""+i, "text","batman")); } assertU(commit()); - assertU(adoc("id","120", "text","batman superman")); // in a segment by itself + assertU(adoc("id","120", "text","batman superman")); // in a smaller segment + assertU(adoc("id","121", "text","superman")); assertU(commit()); - // batman and superman have the same idf in single-doc segment, but very different in the complete index. + // superman has a higher df (thus lower idf) in one segment, but reversed in the complete index String q ="{!func}query($qq)"; String fq="id:120"; assertQ(req("fl","*,score","q", q, "qq","text:batman", "fq",fq), "//float[@name='score']<'1.0'"); assertQ(req("fl","*,score","q", q, "qq","text:superman", "fq",fq), "//float[@name='score']>'1.0'"); // test weighting through a function range query - assertQ(req("fl","*,score", "q", "{!frange l=1 u=10}query($qq)", "qq","text:superman"), "//*[@numFound='1']"); + assertQ(req("fl","*,score", "fq",fq, "q", "{!frange l=1 u=10}query($qq)", "qq","text:superman"), "//*[@numFound='1']"); // test weighting through a complex function q ="{!func}sub(div(sum(0.0,product(1,query($qq))),1),0)"; @@ -356,6 +361,14 @@ public class TestFunctionQuery extends SolrTestCaseJ4 { // OK } + // test that sorting by function weights correctly. superman should sort higher than batman due to idf of the whole index + + assertQ(req("q", "*:*", "fq","id:120 OR id:121", "sort","{!func v=$sortfunc} desc", "sortfunc","query($qq)", "qq","text:(batman OR superman)") + ,"*//doc[1]/float[.='120.0']" + ,"*//doc[2]/float[.='121.0']" + ); + + purgeFieldCache(FieldCache.DEFAULT); // avoid FC insanity } diff --git a/solr/src/test/org/apache/solr/servlet/CacheHeaderTest.java b/solr/src/test/org/apache/solr/servlet/CacheHeaderTest.java index 3c400f02154..76b92c082c3 100644 --- a/solr/src/test/org/apache/solr/servlet/CacheHeaderTest.java +++ b/solr/src/test/org/apache/solr/servlet/CacheHeaderTest.java @@ -85,6 +85,7 @@ public class CacheHeaderTest extends CacheHeaderTestBase { } } + @Override protected void doLastModified(String method) throws Exception { // We do a first request to get the last modified // This must result in a 200 OK response @@ -140,6 +141,7 @@ public class CacheHeaderTest extends CacheHeaderTestBase { } // test ETag + @Override protected void doETag(String method) throws Exception { HttpMethodBase get = getSelectMethod(method); getClient().executeMethod(get); @@ -211,6 +213,7 @@ public class CacheHeaderTest extends CacheHeaderTestBase { .getStatusCode()); } + @Override protected void doCacheControl(String method) throws Exception { if ("POST".equals(method)) { HttpMethodBase m = getSelectMethod(method); diff --git a/solr/src/test/org/apache/solr/servlet/DirectSolrConnectionTest.java b/solr/src/test/org/apache/solr/servlet/DirectSolrConnectionTest.java index 4a20f82a5ed..bb4031e66f8 100644 --- a/solr/src/test/org/apache/solr/servlet/DirectSolrConnectionTest.java +++ b/solr/src/test/org/apache/solr/servlet/DirectSolrConnectionTest.java @@ -24,7 +24,9 @@ import org.apache.solr.util.AbstractSolrTestCase; public class DirectSolrConnectionTest extends AbstractSolrTestCase { + @Override public String getSchemaFile() { return "solr/crazy-path-to-schema.xml"; } + @Override public String getSolrConfigFile() { return "solr/crazy-path-to-config.xml"; } DirectSolrConnection direct; diff --git a/solr/src/test/org/apache/solr/servlet/NoCacheHeaderTest.java b/solr/src/test/org/apache/solr/servlet/NoCacheHeaderTest.java index b6bf2adaa3b..e1d5bc696e0 100644 --- a/solr/src/test/org/apache/solr/servlet/NoCacheHeaderTest.java +++ b/solr/src/test/org/apache/solr/servlet/NoCacheHeaderTest.java @@ -35,18 +35,21 @@ public class NoCacheHeaderTest extends CacheHeaderTestBase { } // The tests + @Override @Test public void testLastModified() throws Exception { doLastModified("GET"); doLastModified("HEAD"); } + @Override @Test public void testEtag() throws Exception { doETag("GET"); doETag("HEAD"); } + @Override @Test public void testCacheControl() throws Exception { doCacheControl("GET"); @@ -54,6 +57,7 @@ public class NoCacheHeaderTest extends CacheHeaderTestBase { doCacheControl("POST"); } + @Override protected void doLastModified(String method) throws Exception { // We do a first request to get the last modified // This must result in a 200 OK response @@ -105,6 +109,7 @@ public class NoCacheHeaderTest extends CacheHeaderTestBase { } // test ETag + @Override protected void doETag(String method) throws Exception { HttpMethodBase get = getSelectMethod(method); getClient().executeMethod(get); @@ -153,6 +158,7 @@ public class NoCacheHeaderTest extends CacheHeaderTestBase { .getStatusCode()); } + @Override protected void doCacheControl(String method) throws Exception { HttpMethodBase m = getSelectMethod(method); getClient().executeMethod(m); diff --git a/solr/src/test/org/apache/solr/servlet/SolrRequestParserTest.java b/solr/src/test/org/apache/solr/servlet/SolrRequestParserTest.java index cc763c885b6..6f7cb427282 100644 --- a/solr/src/test/org/apache/solr/servlet/SolrRequestParserTest.java +++ b/solr/src/test/org/apache/solr/servlet/SolrRequestParserTest.java @@ -74,7 +74,7 @@ public class SolrRequestParserTest extends SolrTestCaseJ4 { List streams = new ArrayList(); SolrQueryRequest req = parser.buildRequestFrom( core, new MultiMapSolrParams( args ), streams ); assertEquals( 1, streams.size() ); - assertEquals( body1, IOUtils.toString( streams.get(0).getStream() ) ); + assertEquals( body1, IOUtils.toString( streams.get(0).getReader() ) ); req.close(); // Now add three and make sure they come out ok @@ -87,9 +87,9 @@ public class SolrRequestParserTest extends SolrTestCaseJ4 { input.add( body1 ); input.add( body2 ); input.add( body3 ); - output.add( IOUtils.toString( streams.get(0).getStream() ) ); - output.add( IOUtils.toString( streams.get(1).getStream() ) ); - output.add( IOUtils.toString( streams.get(2).getStream() ) ); + output.add( IOUtils.toString( streams.get(0).getReader() ) ); + output.add( IOUtils.toString( streams.get(1).getReader() ) ); + output.add( IOUtils.toString( streams.get(2).getReader() ) ); // sort them so the output is consistent Collections.sort( input ); Collections.sort( output ); @@ -112,13 +112,13 @@ public class SolrRequestParserTest extends SolrTestCaseJ4 { { boolean ok = false; String url = "http://www.apache.org/dist/lucene/solr/"; - String txt = null; + byte[] bytes = null; try { URLConnection connection = new URL(url).openConnection(); connection.setConnectTimeout(5000); connection.setReadTimeout(5000); connection.connect(); - txt = IOUtils.toString( connection.getInputStream()); + bytes = IOUtils.toByteArray( connection.getInputStream()); } catch( Exception ex ) { assumeNoException("Unable to connect to " + url + " to run the test.", ex); @@ -134,7 +134,7 @@ public class SolrRequestParserTest extends SolrTestCaseJ4 { List streams = new ArrayList(); SolrQueryRequest req = parser.buildRequestFrom( core, new MultiMapSolrParams( args ), streams ); assertEquals( 1, streams.size() ); - assertEquals( txt, IOUtils.toString( streams.get(0).getStream() ) ); + assertArrayEquals( bytes, IOUtils.toByteArray( streams.get(0).getStream() ) ); req.close(); } diff --git a/solr/src/test/org/apache/solr/spelling/DirectSolrSpellCheckerTest.java b/solr/src/test/org/apache/solr/spelling/DirectSolrSpellCheckerTest.java index 41b64532a8b..7e93afb8358 100644 --- a/solr/src/test/org/apache/solr/spelling/DirectSolrSpellCheckerTest.java +++ b/solr/src/test/org/apache/solr/spelling/DirectSolrSpellCheckerTest.java @@ -62,7 +62,7 @@ public class DirectSolrSpellCheckerTest extends SolrTestCaseJ4 { RefCounted searcher = core.getSearcher(); Collection tokens = queryConverter.convert("fob"); - SpellingOptions spellOpts = new SpellingOptions(tokens, searcher.get().getReader()); + SpellingOptions spellOpts = new SpellingOptions(tokens, searcher.get().getIndexReader()); SpellingResult result = checker.getSuggestions(spellOpts); assertTrue("result is null and it shouldn't be", result != null); Map suggestions = result.get(tokens.iterator().next()); diff --git a/solr/src/test/org/apache/solr/spelling/FileBasedSpellCheckerTest.java b/solr/src/test/org/apache/solr/spelling/FileBasedSpellCheckerTest.java index 1230e78aa0b..3c536058f1f 100644 --- a/solr/src/test/org/apache/solr/spelling/FileBasedSpellCheckerTest.java +++ b/solr/src/test/org/apache/solr/spelling/FileBasedSpellCheckerTest.java @@ -78,7 +78,7 @@ public class FileBasedSpellCheckerTest extends SolrTestCaseJ4 { RefCounted searcher = core.getSearcher(); Collection tokens = queryConverter.convert("fob"); - SpellingOptions spellOpts = new SpellingOptions(tokens, searcher.get().getReader()); + SpellingOptions spellOpts = new SpellingOptions(tokens, searcher.get().getIndexReader()); SpellingResult result = checker.getSuggestions(spellOpts); assertTrue("result is null and it shouldn't be", result != null); Map suggestions = result.get(tokens.iterator().next()); @@ -117,7 +117,7 @@ public class FileBasedSpellCheckerTest extends SolrTestCaseJ4 { RefCounted searcher = core.getSearcher(); Collection tokens = queryConverter.convert("Solar"); - SpellingOptions spellOpts = new SpellingOptions(tokens, searcher.get().getReader()); + SpellingOptions spellOpts = new SpellingOptions(tokens, searcher.get().getIndexReader()); SpellingResult result = checker.getSuggestions(spellOpts); assertTrue("result is null and it shouldn't be", result != null); //should be lowercased, b/c we are using a lowercasing analyzer @@ -160,7 +160,7 @@ public class FileBasedSpellCheckerTest extends SolrTestCaseJ4 { RefCounted searcher = core.getSearcher(); Collection tokens = queryConverter.convert("solar"); - SpellingOptions spellOpts = new SpellingOptions(tokens, searcher.get().getReader()); + SpellingOptions spellOpts = new SpellingOptions(tokens, searcher.get().getIndexReader()); SpellingResult result = checker.getSuggestions(spellOpts); assertTrue("result is null and it shouldn't be", result != null); //should be lowercased, b/c we are using a lowercasing analyzer diff --git a/solr/src/test/org/apache/solr/spelling/IndexBasedSpellCheckerTest.java b/solr/src/test/org/apache/solr/spelling/IndexBasedSpellCheckerTest.java index d7f8edfb95b..82087c023b5 100644 --- a/solr/src/test/org/apache/solr/spelling/IndexBasedSpellCheckerTest.java +++ b/solr/src/test/org/apache/solr/spelling/IndexBasedSpellCheckerTest.java @@ -121,7 +121,7 @@ public class IndexBasedSpellCheckerTest extends SolrTestCaseJ4 { try { checker.build(core, searcher); - IndexReader reader = searcher.getReader(); + IndexReader reader = searcher.getIndexReader(); Collection tokens = queryConverter.convert("documemt"); SpellingOptions spellOpts = new SpellingOptions(tokens, reader); SpellingResult result = checker.getSuggestions(spellOpts); @@ -196,7 +196,7 @@ public class IndexBasedSpellCheckerTest extends SolrTestCaseJ4 { try { checker.build(core, searcher); - IndexReader reader = searcher.getReader(); + IndexReader reader = searcher.getIndexReader(); Collection tokens = queryConverter.convert("documemt"); SpellingOptions spellOpts = new SpellingOptions(tokens, reader, 1, false, true, 0.5f, null); SpellingResult result = checker.getSuggestions(spellOpts); @@ -227,6 +227,7 @@ public class IndexBasedSpellCheckerTest extends SolrTestCaseJ4 { } private class TestSpellChecker extends IndexBasedSpellChecker{ + @Override public SpellChecker getSpellChecker(){ return spellChecker; } @@ -284,8 +285,7 @@ public class IndexBasedSpellCheckerTest extends SolrTestCaseJ4 { Directory dir = newFSDirectory(altIndexDir); IndexWriter iw = new IndexWriter( dir, - new IndexWriterConfig(TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT)). - setMaxFieldLength(IndexWriterConfig.UNLIMITED_FIELD_LENGTH) + new IndexWriterConfig(TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT)) ); for (int i = 0; i < ALT_DOCS.length; i++) { Document doc = new Document(); @@ -309,7 +309,7 @@ public class IndexBasedSpellCheckerTest extends SolrTestCaseJ4 { try { checker.build(core, searcher); - IndexReader reader = searcher.getReader(); + IndexReader reader = searcher.getIndexReader(); Collection tokens = queryConverter.convert("flesh"); SpellingOptions spellOpts = new SpellingOptions(tokens, reader, 1, false, true, 0.5f, null); SpellingResult result = checker.getSuggestions(spellOpts); diff --git a/solr/src/test/org/apache/solr/spelling/SpellPossibilityIteratorTest.java b/solr/src/test/org/apache/solr/spelling/SpellPossibilityIteratorTest.java index 34a950950c3..b70ba1fb95a 100644 --- a/solr/src/test/org/apache/solr/spelling/SpellPossibilityIteratorTest.java +++ b/solr/src/test/org/apache/solr/spelling/SpellPossibilityIteratorTest.java @@ -22,16 +22,17 @@ import java.util.Map; import org.apache.lucene.analysis.Token; import org.apache.solr.SolrTestCaseJ4; import org.apache.solr.spelling.PossibilityIterator; -import org.junit.BeforeClass; +import org.junit.Before; import org.junit.Test; public class SpellPossibilityIteratorTest extends SolrTestCaseJ4 { private static Map> suggestions = new LinkedHashMap>(); - @BeforeClass - public static void beforeClass() throws Exception { - + @Override + @Before + public void setUp() throws Exception { + super.setUp(); suggestions.clear(); LinkedHashMap AYE = new LinkedHashMap(); @@ -72,7 +73,7 @@ public class SpellPossibilityIteratorTest extends SolrTestCaseJ4 { suggestions.put(new Token("BEE", 0, 2), BEE); suggestions.put(new Token("CEE", 0, 2), CEE); } - + @Test public void testSpellPossibilityIterator() throws Exception { PossibilityIterator iter = new PossibilityIterator(suggestions); diff --git a/solr/src/test/org/apache/solr/update/AutoCommitTest.java b/solr/src/test/org/apache/solr/update/AutoCommitTest.java index e121423394c..cb0afdded27 100644 --- a/solr/src/test/org/apache/solr/update/AutoCommitTest.java +++ b/solr/src/test/org/apache/solr/update/AutoCommitTest.java @@ -78,7 +78,9 @@ class CommitListener implements SolrEventListener { public class AutoCommitTest extends AbstractSolrTestCase { + @Override public String getSchemaFile() { return "schema.xml"; } + @Override public String getSolrConfigFile() { return "solrconfig.xml"; } /** diff --git a/solr/src/test/org/apache/solr/update/DirectUpdateHandlerOptimizeTest.java b/solr/src/test/org/apache/solr/update/DirectUpdateHandlerOptimizeTest.java index 98f92553e7d..2f425545d65 100644 --- a/solr/src/test/org/apache/solr/update/DirectUpdateHandlerOptimizeTest.java +++ b/solr/src/test/org/apache/solr/update/DirectUpdateHandlerOptimizeTest.java @@ -32,10 +32,12 @@ import java.io.FileFilter; **/ public class DirectUpdateHandlerOptimizeTest extends AbstractSolrTestCase { + @Override public String getSchemaFile() { return "schema12.xml"; } + @Override public String getSolrConfigFile() { // return "solrconfig-duh-optimize.xml"; return "solrconfig.xml"; diff --git a/solr/src/test/org/apache/solr/update/DirectUpdateHandlerTest.java b/solr/src/test/org/apache/solr/update/DirectUpdateHandlerTest.java index 2e05af23d3d..654d474d161 100644 --- a/solr/src/test/org/apache/solr/update/DirectUpdateHandlerTest.java +++ b/solr/src/test/org/apache/solr/update/DirectUpdateHandlerTest.java @@ -20,18 +20,13 @@ package org.apache.solr.update; import java.util.HashMap; import java.util.Map; -import org.apache.lucene.document.Document; -import org.apache.lucene.document.Field; -import org.apache.lucene.document.Field.Index; -import org.apache.lucene.document.Field.Store; +import org.apache.lucene.index.IndexReader; import org.apache.solr.SolrTestCaseJ4; -import org.apache.solr.common.SolrException; import org.apache.solr.common.params.CommonParams; import org.apache.solr.common.params.MapSolrParams; import org.apache.solr.core.SolrCore; import org.apache.solr.request.LocalSolrQueryRequest; import org.apache.solr.request.SolrQueryRequest; -import org.apache.solr.search.SolrIndexReader; import org.junit.Before; import org.junit.BeforeClass; import org.junit.Test; @@ -47,6 +42,7 @@ public class DirectUpdateHandlerTest extends SolrTestCaseJ4 { initCore("solrconfig.xml", "schema12.xml"); } + @Override @Before public void setUp() throws Exception { super.setUp(); @@ -243,18 +239,18 @@ public class DirectUpdateHandlerTest extends SolrTestCaseJ4 { assertU(commit()); SolrQueryRequest sr = req("q","foo"); - SolrIndexReader r = sr.getSearcher().getReader(); + IndexReader r = sr.getSearcher().getTopReaderContext().reader; assertTrue(r.maxDoc() > r.numDocs()); // should have deletions - assertTrue(r.getLeafReaders().length > 1); // more than 1 segment + assertFalse(r.getTopReaderContext().isAtomic); // more than 1 segment sr.close(); assertU(commit("expungeDeletes","true")); sr = req("q","foo"); - r = sr.getSearcher().getReader(); + r = sr.getSearcher().getTopReaderContext().reader; assertEquals(r.maxDoc(), r.numDocs()); // no deletions assertEquals(4,r.maxDoc()); // no dups - assertTrue(r.getLeafReaders().length > 1); // still more than 1 segment + assertFalse(r.getTopReaderContext().isAtomic); //still more than 1 segment sr.close(); } diff --git a/solr/src/test/org/apache/solr/update/TestIndexingPerformance.java b/solr/src/test/org/apache/solr/update/TestIndexingPerformance.java index f513a94ca1d..51a55c4aaf1 100755 --- a/solr/src/test/org/apache/solr/update/TestIndexingPerformance.java +++ b/solr/src/test/org/apache/solr/update/TestIndexingPerformance.java @@ -41,7 +41,9 @@ public class TestIndexingPerformance extends AbstractSolrTestCase { public static final Logger log = LoggerFactory.getLogger(TestIndexingPerformance.class); + @Override public String getSchemaFile() { return "schema12.xml"; } + @Override public String getSolrConfigFile() { return "solrconfig_perf.xml"; } public void testIndexingPerf() throws IOException { diff --git a/solr/src/test/org/apache/solr/update/processor/SignatureUpdateProcessorFactoryTest.java b/solr/src/test/org/apache/solr/update/processor/SignatureUpdateProcessorFactoryTest.java index 9b07cab1312..7bd622b6074 100755 --- a/solr/src/test/org/apache/solr/update/processor/SignatureUpdateProcessorFactoryTest.java +++ b/solr/src/test/org/apache/solr/update/processor/SignatureUpdateProcessorFactoryTest.java @@ -62,7 +62,7 @@ public class SignatureUpdateProcessorFactoryTest extends SolrTestCaseJ4 { void checkNumDocs(int n) { SolrQueryRequest req = req(); try { - assertEquals(n, req.getSearcher().getReader().numDocs()); + assertEquals(n, req.getSearcher().getIndexReader().numDocs()); } finally { req.close(); } @@ -129,6 +129,7 @@ public class SignatureUpdateProcessorFactoryTest extends SolrTestCaseJ4 { for (int i = 0; i < threads.length; i++) { threads[i] = new Thread() { + @Override public void run() { for (int i = 0; i < 30; i++) { // h.update(adoc("id", Integer.toString(1+ i), "v_t", @@ -150,6 +151,7 @@ public class SignatureUpdateProcessorFactoryTest extends SolrTestCaseJ4 { for (int i = 0; i < threads2.length; i++) { threads2[i] = new Thread() { + @Override public void run() { for (int i = 0; i < 10; i++) { // h.update(adoc("id" , Integer.toString(1+ i + 10000), "v_t", diff --git a/solr/src/test/org/apache/solr/util/AbstractSolrTestCase.java b/solr/src/test/org/apache/solr/util/AbstractSolrTestCase.java index fec67be17b0..c9ecd2f3038 100644 --- a/solr/src/test/org/apache/solr/util/AbstractSolrTestCase.java +++ b/solr/src/test/org/apache/solr/util/AbstractSolrTestCase.java @@ -111,6 +111,7 @@ public abstract class AbstractSolrTestCase extends LuceneTestCase { public static Logger log = LoggerFactory.getLogger(AbstractSolrTestCase.class); private String factoryProp; + @Override public void setUp() throws Exception { super.setUp(); log.info("####SETUP_START " + getName()); @@ -170,6 +171,7 @@ public abstract class AbstractSolrTestCase extends LuceneTestCase { * to delete dataDir, unless the system property "solr.test.leavedatadir" * is set. */ + @Override public void tearDown() throws Exception { log.info("####TEARDOWN_START " + getName()); if (factoryProp == null) { @@ -403,6 +405,7 @@ public abstract class AbstractSolrTestCase extends LuceneTestCase { /** Neccessary to make method signatures un-ambiguous */ public static class Doc { public String xml; + @Override public String toString() { return xml; } } diff --git a/solr/src/test/org/apache/solr/util/TestNumberUtils.java b/solr/src/test/org/apache/solr/util/TestNumberUtils.java index 0e6c915697a..cc462944093 100644 --- a/solr/src/test/org/apache/solr/util/TestNumberUtils.java +++ b/solr/src/test/org/apache/solr/util/TestNumberUtils.java @@ -182,72 +182,88 @@ abstract class Converter { } class Int2Int extends Converter { + @Override public String toInternal(String val) { return Integer.toString(Integer.parseInt(val)); } + @Override public String toExternal(String val) { return Integer.toString(Integer.parseInt(val)); } } class SortInt extends Converter { + @Override public String toInternal(String val) { return NumberUtils.int2sortableStr(val); } + @Override public String toExternal(String val) { return NumberUtils.SortableStr2int(val); } } class SortLong extends Converter { + @Override public String toInternal(String val) { return NumberUtils.long2sortableStr(val); } + @Override public String toExternal(String val) { return NumberUtils.SortableStr2long(val); } } class Float2Float extends Converter { + @Override public String toInternal(String val) { return Float.toString(Float.parseFloat(val)); } + @Override public String toExternal(String val) { return Float.toString(Float.parseFloat(val)); } } class SortFloat extends Converter { + @Override public String toInternal(String val) { return NumberUtils.float2sortableStr(val); } + @Override public String toExternal(String val) { return NumberUtils.SortableStr2floatStr(val); } } class SortDouble extends Converter { + @Override public String toInternal(String val) { return NumberUtils.double2sortableStr(val); } + @Override public String toExternal(String val) { return NumberUtils.SortableStr2doubleStr(val); } } class Base100S extends Converter { + @Override public String toInternal(String val) { return BCDUtils.base10toBase100SortableInt(val); } + @Override public String toExternal(String val) { return BCDUtils.base100SortableIntToBase10(val); } } class Base10kS extends Converter { + @Override public String toInternal(String val) { return BCDUtils.base10toBase10kSortableInt(val); } + @Override public String toExternal(String val) { return BCDUtils.base10kSortableIntToBase10(val); } diff --git a/solr/src/test/org/apache/solr/velocity/VelocityResponseWriterTest.java b/solr/src/test/org/apache/solr/velocity/VelocityResponseWriterTest.java index 911ca19b1ad..ae220a0a852 100644 --- a/solr/src/test/org/apache/solr/velocity/VelocityResponseWriterTest.java +++ b/solr/src/test/org/apache/solr/velocity/VelocityResponseWriterTest.java @@ -1,3 +1,20 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + package org.apache.solr.velocity; import org.apache.solr.response.SolrQueryResponse; @@ -9,7 +26,9 @@ import java.io.StringWriter; import java.io.IOException; public class VelocityResponseWriterTest extends AbstractSolrTestCase { + @Override public String getSchemaFile() { return "schema.xml"; } + @Override public String getSolrConfigFile() { return "solrconfig.xml"; } diff --git a/solr/src/webapp/src/org/apache/solr/servlet/LogLevelSelection.java b/solr/src/webapp/src/org/apache/solr/servlet/LogLevelSelection.java index e272d5e973f..e8996e26346 100644 --- a/solr/src/webapp/src/org/apache/solr/servlet/LogLevelSelection.java +++ b/solr/src/webapp/src/org/apache/solr/servlet/LogLevelSelection.java @@ -36,6 +36,7 @@ import java.util.logging.Logger; * @since solr 1.3 */ public final class LogLevelSelection extends HttpServlet { + @Override public void init() throws ServletException { } @@ -43,6 +44,7 @@ public final class LogLevelSelection extends HttpServlet { * Processes an HTTP GET request and changes the logging level as * specified. */ + @Override public void doGet(HttpServletRequest request, HttpServletResponse response) throws IOException, ServletException { @@ -147,6 +149,7 @@ public final class LogLevelSelection extends HttpServlet { } + @Override public void doPost(HttpServletRequest request, HttpServletResponse response) throws IOException, ServletException { @@ -259,6 +262,7 @@ public final class LogLevelSelection extends HttpServlet { return name.compareTo(((LogWrapper) other).name); } + @Override public boolean equals(Object obj) { if (this == obj) return true; @@ -275,6 +279,7 @@ public final class LogLevelSelection extends HttpServlet { return true; } + @Override public int hashCode() { final int prime = 31; int result = 1; diff --git a/solr/src/webapp/src/org/apache/solr/servlet/SolrDispatchFilter.java b/solr/src/webapp/src/org/apache/solr/servlet/SolrDispatchFilter.java index 93bdddc1720..4250dd4e41a 100644 --- a/solr/src/webapp/src/org/apache/solr/servlet/SolrDispatchFilter.java +++ b/solr/src/webapp/src/org/apache/solr/servlet/SolrDispatchFilter.java @@ -65,7 +65,7 @@ public class SolrDispatchFilter implements Filter public SolrDispatchFilter() { try { - adminRequestParser = new SolrRequestParsers(new Config(null,"solr",new ByteArrayInputStream("".getBytes()),"") ); + adminRequestParser = new SolrRequestParsers(new Config(null,"solr",new ByteArrayInputStream("".getBytes("UTF-8")),"") ); } catch (Exception e) { //unlikely throw new SolrException(SolrException.ErrorCode.SERVER_ERROR,e); @@ -315,7 +315,9 @@ public class SolrDispatchFilter implements Filter sendError((HttpServletResponse) response, solrRsp.getException()); } else { // Now write it out - response.setContentType(responseWriter.getContentType(solrReq, solrRsp)); + final String ct = responseWriter.getContentType(solrReq, solrRsp); + // don't call setContentType on null + if (null != ct) response.setContentType(ct); if (Method.HEAD != reqMethod) { if (responseWriter instanceof BinaryQueryResponseWriter) { BinaryQueryResponseWriter binWriter = (BinaryQueryResponseWriter) responseWriter; diff --git a/solr/src/webapp/src/org/apache/solr/servlet/cache/HttpCacheHeaderUtil.java b/solr/src/webapp/src/org/apache/solr/servlet/cache/HttpCacheHeaderUtil.java index 6169a5cb095..ce1d55680be 100644 --- a/solr/src/webapp/src/org/apache/solr/servlet/cache/HttpCacheHeaderUtil.java +++ b/solr/src/webapp/src/org/apache/solr/servlet/cache/HttpCacheHeaderUtil.java @@ -18,6 +18,7 @@ package org.apache.solr.servlet.cache; import java.io.IOException; +import java.io.UnsupportedEncodingException; import java.util.Collections; import java.util.Map; import java.util.WeakHashMap; @@ -75,11 +76,15 @@ public final class HttpCacheHeaderUtil { if (currentIndexVersion != indexVersionCache) { indexVersionCache=currentIndexVersion; - etagCache = "\"" - + new String(Base64.encodeBase64((Long.toHexString - (Long.reverse(indexVersionCache)) - + etagSeed).getBytes())) - + "\""; + try { + etagCache = "\"" + + new String(Base64.encodeBase64((Long.toHexString + (Long.reverse(indexVersionCache)) + + etagSeed).getBytes()), "US-ASCII") + + "\""; + } catch (UnsupportedEncodingException e) { + throw new RuntimeException(e); // may not happen + } } return etagCache; @@ -95,7 +100,7 @@ public final class HttpCacheHeaderUtil { public static String calcEtag(final SolrQueryRequest solrReq) { final SolrCore core = solrReq.getCore(); final long currentIndexVersion - = solrReq.getSearcher().getReader().getVersion(); + = solrReq.getSearcher().getIndexReader().getVersion(); EtagCacheVal etagCache = etagCoreCache.get(core); if (null == etagCache) { @@ -152,7 +157,7 @@ public final class HttpCacheHeaderUtil { // assume default, change if needed (getOpenTime() should be fast) lastMod = LastModFrom.DIRLASTMOD == lastModFrom - ? IndexReader.lastModified(searcher.getReader().directory()) + ? IndexReader.lastModified(searcher.getIndexReader().directory()) : searcher.getOpenTime(); } catch (IOException e) { // we're pretty freaking screwed if this happens diff --git a/solr/src/webapp/web/admin/analysis.jsp b/solr/src/webapp/web/admin/analysis.jsp index 347691e4d0b..89dbd71d677 100644 --- a/solr/src/webapp/web/admin/analysis.jsp +++ b/solr/src/webapp/web/admin/analysis.jsp @@ -24,6 +24,7 @@ org.apache.lucene.analysis.CharReader, org.apache.lucene.analysis.CharStream, org.apache.lucene.analysis.tokenattributes.*, + org.apache.lucene.util.AttributeReflector, org.apache.solr.analysis.CharFilterFactory, org.apache.solr.analysis.TokenFilterFactory, org.apache.solr.analysis.TokenizerChain, @@ -31,7 +32,8 @@ org.apache.solr.schema.FieldType, org.apache.solr.schema.SchemaField, org.apache.solr.common.util.XML, - javax.servlet.jsp.JspWriter,java.io.IOException + javax.servlet.jsp.JspWriter,java.io.IOException, + org.apache.noggit.CharArr "%> <%@ page import="java.io.Reader"%> <%@ page import="java.io.StringReader"%> @@ -39,8 +41,6 @@ <%@ page import="java.math.BigInteger" %> <%-- $Id$ --%> -<%-- $Source: /cvs/main/searching/org.apache.solrolarServer/resources/admin/analysis.jsp,v $ --%> -<%-- $Name: $ --%> <%@include file="header.jsp" %> @@ -71,19 +71,19 @@ @@ -115,7 +115,7 @@ @@ -148,24 +148,28 @@ } if (field!=null) { - HashSet matches = null; + HashSet matches = null; if (qval!="" && highlight) { Reader reader = new StringReader(qval); Analyzer analyzer = field.getType().getQueryAnalyzer(); TokenStream tstream = analyzer.reusableTokenStream(field.getName(),reader); + TermToBytesRefAttribute bytesAtt = tstream.getAttribute(TermToBytesRefAttribute.class); tstream.reset(); - List tokens = getTokens(tstream); - matches = new HashSet(); - for (AttributeSource t : tokens) { matches.add( new Tok(t,0)); } + matches = new HashSet(); + while (tstream.incrementToken()) { + final BytesRef bytes = new BytesRef(); + bytesAtt.toBytesRef(bytes); + matches.add(bytes); + } } if (val!="") { out.println("

    Index Analyzer

    "); - doAnalyzer(out, field, val, false, verbose,matches); + doAnalyzer(out, field, val, false, verbose, matches); } if (qval!="") { out.println("

    Query Analyzer

    "); - doAnalyzer(out, field, qval, true, qverbose,null); + doAnalyzer(out, field, qval, true, qverbose, null); } } @@ -177,7 +181,7 @@ <%! - private static void doAnalyzer(JspWriter out, SchemaField field, String val, boolean queryAnalyser, boolean verbose, Set match) throws Exception { + private static void doAnalyzer(JspWriter out, SchemaField field, String val, boolean queryAnalyser, boolean verbose, Set match) throws Exception { FieldType ft = field.getType(); Analyzer analyzer = queryAnalyser ? @@ -240,7 +244,7 @@ tstream.reset(); List tokens = getTokens(tstream); if (verbose) { - writeHeader(out, analyzer.getClass(), new HashMap()); + writeHeader(out, analyzer.getClass(), Collections.EMPTY_MAP); } writeTokens(out, tokens, ft, verbose, match); } @@ -249,52 +253,59 @@ static List getTokens(TokenStream tstream) throws IOException { List tokens = new ArrayList(); - - while (true) { - if (!tstream.incrementToken()) - break; - else { - tokens.add(tstream.cloneAttributes()); - } + tstream.reset(); + while (tstream.incrementToken()) { + tokens.add(tstream.cloneAttributes()); } return tokens; } - + private static class ReflectItem { + final Class attClass; + final String key; + final Object value; + + ReflectItem(Class attClass, String key, Object value) { + this.attClass = attClass; + this.key = key; + this.value = value; + } + } + private static class Tok { - AttributeSource token; - int pos; - Tok(AttributeSource token, int pos) { - this.token=token; - this.pos=pos; - } - - public boolean equals(Object o) { - return ((Tok)o).token.toString().equals(token.toString()); - } - public int hashCode() { - return token.toString().hashCode(); - } - public String toString() { - return token.toString(); - } - public String toPrintableString() { - TermToBytesRefAttribute att = token.addAttribute(TermToBytesRefAttribute.class); - if (att instanceof CharTermAttribute) - return att.toString(); - else { - BytesRef bytes = new BytesRef(); - att.toBytesRef(bytes); - return bytes.toString(); - } + final BytesRef bytes = new BytesRef(); + final String rawText, text; + final int pos; + final List reflected = new ArrayList(); + + Tok(AttributeSource token, int pos, FieldType ft) { + this.pos = pos; + token.getAttribute(TermToBytesRefAttribute.class).toBytesRef(bytes); + rawText = (token.hasAttribute(CharTermAttribute.class)) ? + token.getAttribute(CharTermAttribute.class).toString() : null; + final CharArr textBuf = new CharArr(bytes.length); + ft.indexedToReadable(bytes, textBuf); + text = textBuf.toString(); + token.reflectWith(new AttributeReflector() { + public void reflect(Class attClass, String key, Object value) { + // leave out position and raw term + if (TermToBytesRefAttribute.class.isAssignableFrom(attClass)) + return; + if (CharTermAttribute.class.isAssignableFrom(attClass)) + return; + if (PositionIncrementAttribute.class.isAssignableFrom(attClass)) + return; + reflected.add(new ReflectItem(attClass, key, value)); + } + }); } } - private static interface ToStr { - public String toStr(Object o); + private static interface TokToStr { + public String toStr(Tok o); } - private static void printRow(JspWriter out, String header, List[] arrLst, ToStr converter, boolean multival, boolean verbose, Set match) throws IOException { + private static void printRow(JspWriter out, String header, String headerTitle, List[] arrLst, TokToStr converter, boolean multival, boolean verbose, Set match) throws IOException { // find the maximum number of terms for any position int maxSz=1; if (multival) { @@ -308,7 +319,13 @@ out.println("
    "); if (idx==0 && verbose) { if (header != null) { - out.print(""); } @@ -317,7 +334,7 @@ for (int posIndex=0; posIndex lst = arrLst[posIndex]; if (lst.size() <= idx) continue; - if (match!=null && match.contains(lst.get(idx))) { + if (match!=null && match.contains(lst.get(idx).bytes)) { out.print("
    - Field + Field - +
    - Field value (Index) + Field value (Index)
    verbose output >
    - +
    - Field value (Query) + Field value (Query)
    verbose output >
    - +
    - +
    "); + out.print(""); XML.escapeCharData(header,out); out.println(" args) throws IOException { out.print("

    "); out.print(clazz.getName()); @@ -359,137 +367,93 @@ // readable, raw, pos, type, start/end - static void writeTokens(JspWriter out, List tokens, final FieldType ft, boolean verbose, Set match) throws IOException { + static void writeTokens(JspWriter out, List tokens, final FieldType ft, boolean verbose, Set match) throws IOException { // Use a map to tell what tokens are in what positions // because some tokenizers/filters may do funky stuff with // very large increments, or negative increments. HashMap> map = new HashMap>(); boolean needRaw=false; - int pos=0; + int pos=0, reflectionCount = -1; for (AttributeSource t : tokens) { - if (!t.toString().equals(ft.indexedToReadable(t.toString()))) { - needRaw=true; - } - pos += t.addAttribute(PositionIncrementAttribute.class).getPositionIncrement(); List lst = map.get(pos); if (lst==null) { lst = new ArrayList(1); map.put(pos,lst); } - Tok tok = new Tok(t,pos); + Tok tok = new Tok(t,pos,ft); + // sanity check + if (reflectionCount < 0) { + reflectionCount = tok.reflected.size(); + } else { + if (reflectionCount != tok.reflected.size()) + throw new RuntimeException("Should not happen: Number of reflected entries differs for position=" + pos); + } + if (tok.rawText != null && !tok.text.equals(tok.rawText)) { + needRaw=true; + } lst.add(tok); } List[] arr = (List[])map.values().toArray(new ArrayList[map.size()]); - /* Jetty 6.1.3 miscompiles this generics version... - Arrays.sort(arr, new Comparator>() { - public int compare(List toks, List toks1) { - return toks.get(0).pos - toks1.get(0).pos; - } - } - */ - + // Jetty 6.1.3 miscompiles a generics-enabled version..., without generics: Arrays.sort(arr, new Comparator() { public int compare(Object toks, Object toks1) { return ((List)toks).get(0).pos - ((List)toks1).get(0).pos; } - } - - - ); + }); out.println(""); if (verbose) { - printRow(out,"term position", arr, new ToStr() { - public String toStr(Object o) { - return Integer.toString(((Tok)o).pos); + printRow(out, "position", "calculated from " + PositionIncrementAttribute.class.getName(), arr, new TokToStr() { + public String toStr(Tok t) { + return Integer.toString(t.pos); } - } - ,false - ,verbose - ,null); + },false,verbose,null); } - - printRow(out,"term text", arr, new ToStr() { - public String toStr(Object o) { - return ft.indexedToReadable( ((Tok)o).toPrintableString() ); + printRow(out, "term text", "indexedToReadable applied to " + TermToBytesRefAttribute.class.getName(), arr, new TokToStr() { + public String toStr(Tok t) { + return t.text; } - } - ,true - ,verbose - ,match - ); - - if (needRaw) { - printRow(out,"raw text", arr, new ToStr() { - public String toStr(Object o) { - // page is UTF-8, so anything goes. - return ((Tok)o).toPrintableString(); - } - } - ,true - ,verbose - ,match - ); - } + },true,verbose,match); if (verbose) { - printRow(out,"term type", arr, new ToStr() { - public String toStr(Object o) { - String tt = ((Tok)o).token.addAttribute(TypeAttribute.class).type(); - if (tt == null) { - return "null"; - } else { - return tt; + if (needRaw) { + printRow(out, "raw text", CharTermAttribute.class.getName(), arr, new TokToStr() { + public String toStr(Tok t) { + // page is UTF-8, so anything goes. + return (t.rawText == null) ? "" : t.rawText; } - } + },true,verbose,match); } - ,true - ,verbose, - null - ); - } - - if (verbose) { - printRow(out,"source start,end", arr, new ToStr() { - public String toStr(Object o) { - AttributeSource t = ((Tok)o).token; - return Integer.toString(t.addAttribute(OffsetAttribute.class).startOffset()) + ',' + t.addAttribute(OffsetAttribute.class).endOffset() ; + + printRow(out, "raw bytes", TermToBytesRefAttribute.class.getName(), arr, new TokToStr() { + public String toStr(Tok t) { + return t.bytes.toString(); } - } - ,true - ,verbose - ,null - ); - } + },true,verbose,match); - if (verbose) { - printRow(out,"payload", arr, new ToStr() { - public String toStr(Object o) { - AttributeSource t = ((Tok)o).token; - Payload p = t.addAttribute(PayloadAttribute.class).getPayload(); - if( null != p ) { - BigInteger bi = new BigInteger( p.getData() ); - String ret = bi.toString( 16 ); - if (ret.length() % 2 != 0) { - // Pad with 0 - ret = "0"+ret; + for (int att=0; att < reflectionCount; att++) { + final ReflectItem item0 = arr[0].get(0).reflected.get(att); + final int i = att; + printRow(out, item0.key, item0.attClass.getName(), arr, new TokToStr() { + public String toStr(Tok t) { + final ReflectItem item = t.reflected.get(i); + if (item0.attClass != item.attClass || !item0.key.equals(item.key)) + throw new RuntimeException("Should not happen: attribute types suddenly change at position=" + t.pos); + if (item.value instanceof Payload) { + final Payload p = (Payload) item.value; + return new BytesRef(p.getData()).toString(); + } else { + return (item.value != null) ? item.value.toString() : ""; } - ret += isPayloadString( p ); - return ret; } - return ""; - } + },true,verbose, null); } - ,true - ,verbose - ,null - ); } out.println("
    "); diff --git a/solr/src/webapp/web/admin/index.jsp b/solr/src/webapp/web/admin/index.jsp index b38c6884b82..a34a2b0c876 100644 --- a/solr/src/webapp/web/admin/index.jsp +++ b/solr/src/webapp/web/admin/index.jsp @@ -39,10 +39,10 @@

    <% if (null != core.getSchemaResource()) { %> - [Schema] + [Schema] <% } if (null != core.getConfigResource()) { %> - [Config] + [Config] <% } %> [Analysis] [Schema Browser] <%if(replicationhandler){%>[Replication]<%}%> diff --git a/solr/src/webapp/web/admin/replication/header.jsp b/solr/src/webapp/web/admin/replication/header.jsp index 30e54454d1f..aaf9954b78c 100644 --- a/solr/src/webapp/web/admin/replication/header.jsp +++ b/solr/src/webapp/web/admin/replication/header.jsp @@ -19,7 +19,7 @@ <%@ page import="org.apache.solr.common.util.NamedList, org.apache.solr.common.util.SimpleOrderedMap, org.apache.solr.request.LocalSolrQueryRequest, - org.apache.solr.request.SolrQueryResponse, + org.apache.solr.response.SolrQueryResponse, org.apache.solr.request.SolrRequestHandler, java.util.Map"%> <%@ page import="org.apache.solr.handler.ReplicationHandler" %>